165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*      Perl-Compatible Regular Expressions       *
365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* PCRE is a library of functions to support regular expressions whose syntax
665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichand semantics are as close as possible to those of the Perl 5 language.
765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                       Written by Philip Hazel
965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich           Copyright (c) 1997-2014 University of Cambridge
1065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
1165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich-----------------------------------------------------------------------------
1265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichRedistribution and use in source and binary forms, with or without
1365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmodification, are permitted provided that the following conditions are met:
1465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
1565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    * Redistributions of source code must retain the above copyright notice,
1665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      this list of conditions and the following disclaimer.
1765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
1865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    * Redistributions in binary form must reproduce the above copyright
1965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      notice, this list of conditions and the following disclaimer in the
2065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      documentation and/or other materials provided with the distribution.
2165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
2265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    * Neither the name of the University of Cambridge nor the names of its
2365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      contributors may be used to endorse or promote products derived from
2465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      this software without specific prior written permission.
2565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
2665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
3065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
3165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
3265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
3365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
3465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
3565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPOSSIBILITY OF SUCH DAMAGE.
3765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich-----------------------------------------------------------------------------
3865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
3965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
4065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
4165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This module contains the external function pcre_compile(), along with
4265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsupporting internal functions that are not used by other modules. */
4365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
4465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
4565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef HAVE_CONFIG_H
4665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "config.h"
4765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
4865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
4965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define NLBLOCK cd             /* Block containing newline information */
5065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PSSTART start_pattern  /* Field containing pattern start */
5165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PSEND   end_pattern    /* Field containing pattern end */
5265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
5365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "pcre_internal.h"
5465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
5565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
5665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* When PCRE_DEBUG is defined, we need the pcre(16|32)_printint() function, which
5765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis also used by pcretest. PCRE_DEBUG is not defined when building a production
5865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlibrary. We do not need to select pcre16_printint.c specially, because the
5965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichCOMPILE_PCREx macro will already be appropriately set. */
6065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
6165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef PCRE_DEBUG
6265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* pcre_printint.c should not include any headers */
6365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PCRE_INCLUDED
6465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "pcre_printint.c"
6565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#undef PCRE_INCLUDED
6665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
6765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
6865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
6965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Macro for setting individual bits in class bitmaps. */
7065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
7165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define SETBIT(a,b) a[(b)/8] |= (1 << ((b)&7))
7265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
7365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Maximum length value to check against when making sure that the integer that
7465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichholds the compiled pattern length does not overflow. We make it a bit less than
7565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichINT_MAX to allow for adding in group terminating bytes, so that we don't have
7665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto check them every time. */
7765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
7865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define OFLOW_MAX (INT_MAX - 20)
7965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
8065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Definitions to allow mutual recursion */
8165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
8265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int
8365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  add_list_to_class(pcre_uint8 *, pcre_uchar **, int, compile_data *,
8465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    const pcre_uint32 *, unsigned int);
8565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
8665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
8765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
8865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    pcre_uint32 *, pcre_int32 *, pcre_uint32 *, pcre_int32 *, branch_chain *,
8965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    compile_data *, int *);
9065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
9165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
9265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
9365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
9465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*      Code parameters and static tables         *
9565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
9665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
9765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This value specifies the size of stack workspace that is used during the
9865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirst pre-compile phase that determines how much memory is required. The regex
9965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis partly compiled into this space, but the compiled parts are discarded as
10065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsoon as they can be, so that hopefully there will never be an overrun. The code
10165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdoes, however, check for an overrun. The largest amount I've seen used is 218,
10265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichso this number is very generous.
10365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
10465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThe same workspace is used during the second, actual compile phase for
10565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichremembering forward references to groups so that they can be filled in at the
10665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichend. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE
10765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis 4 there is plenty of room for most patterns. However, the memory can get
10865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfilled up by repetitions of forward references, for example patterns like
10965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/(?1){0,1999}(b)/, and one user did hit the limit. The code has been changed so
11065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthat the workspace is expanded using malloc() in this situation. The value
11165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbelow is therefore a minimum, and we put a maximum on it for safety. The
11265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichminimum is now also defined in terms of LINK_SIZE so that the use of malloc()
11365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichkicks in at the same number of forward references in all cases. */
11465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
11565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define COMPILE_WORK_SIZE (2048*LINK_SIZE)
11665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE)
11765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
11865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This value determines the size of the initial vector that is used for
11965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichremembering named groups during the pre-compile. It is allocated on the stack,
12065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbut if it is too small, it is expanded using malloc(), in a similar way to the
12165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichworkspace. The value is the number of slots in the list. */
12265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
12365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define NAMED_GROUP_LIST_SIZE  20
12465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
12565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The overrun tests check for a slightly smaller size so that they detect the
12665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichoverrun before it actually does run off the end of the data block. */
12765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
12865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define WORK_SIZE_SAFETY_MARGIN (100)
12965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
13065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Private flags added to firstchar and reqchar. */
13165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
13265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define REQ_CASELESS    (1 << 0)        /* Indicates caselessness */
13365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define REQ_VARY        (1 << 1)        /* Reqchar followed non-literal item */
13465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Negative values for the firstchar and reqchar flags */
13565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define REQ_UNSET       (-2)
13665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define REQ_NONE        (-1)
13765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
13865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Repeated character flags. */
13965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
14065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define UTF_LENGTH     0x10000000l      /* The char contains its length. */
14165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
14265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Table for handling escaped characters in the range '0'-'z'. Positive returns
14365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichare simple data values; negative values are for special things like \d and so
14465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichon. Zero means further processing is needed (for things like \x), or the escape
14565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis invalid. */
14665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
14765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC
14865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
14965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is the "normal" table for ASCII systems or for EBCDIC systems running
15065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichin UTF-8 mode. */
15165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
15265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const short int escapes[] = {
15365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       0,
15465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       0,
15565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       0,
15665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       0,
15765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       0,
15865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     CHAR_COLON,              CHAR_SEMICOLON,
15965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     CHAR_LESS_THAN_SIGN,     CHAR_EQUALS_SIGN,
16065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     CHAR_GREATER_THAN_SIGN,  CHAR_QUESTION_MARK,
16165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     CHAR_COMMERCIAL_AT,      -ESC_A,
16265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_B,                  -ESC_C,
16365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_D,                  -ESC_E,
16465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       -ESC_G,
16565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_H,                  0,
16665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       -ESC_K,
16765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       0,
16865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_N,                  0,
16965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_P,                  -ESC_Q,
17065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_R,                  -ESC_S,
17165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       0,
17265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_V,                  -ESC_W,
17365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_X,                  0,
17465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
17565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
17665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
17765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     CHAR_GRAVE_ACCENT,       7,
17865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_b,                  0,
17965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_d,                  ESC_e,
18065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     ESC_f,                   0,
18165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_h,                  0,
18265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       -ESC_k,
18365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       0,
18465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     ESC_n,                   0,
18565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_p,                  0,
18665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     ESC_r,                   -ESC_s,
18765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     ESC_tee,                 0,
18865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_v,                  -ESC_w,
18965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     0,                       0,
19065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     -ESC_z
19165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich};
19265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
19365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
19465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
19565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
19665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
19765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const short int escapes[] = {
19865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
19965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
20065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  58 */     0,     0,    '!',     '$',    '*',   ')',    ';',    '~',
20165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  60 */   '-',   '/',      0,       0,      0,     0,      0,      0,
20265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  68 */     0,     0,    '|',     ',',    '%',   '_',    '>',    '?',
20365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  70 */     0,     0,      0,       0,      0,     0,      0,      0,
20465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',
20565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  80 */     0,     7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
20665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
20765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  90 */     0,     0, -ESC_k,     'l',      0, ESC_n,      0, -ESC_p,
20865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
20965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,
21065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,
21165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  B0 */     0,     0,      0,       0,      0,     0,      0,      0,
21265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',
21365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,
21465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,
21565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  D0 */   '}',     0, -ESC_K,       0,      0,-ESC_N,      0, -ESC_P,
21665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,
21765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,
21865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,
21965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,
22065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*  F8 */     0,     0,      0,       0,      0,     0,      0,      0
22165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich};
22265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
22365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
22465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
22565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Table of special "verbs" like (*PRUNE). This is a short table, so it is
22665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsearched linearly. Put all the names into a single string, in order to reduce
22765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe number of relocations when a shared library is dynamically linked. The
22865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstring is built from string macros so that it works in UTF-8 mode on EBCDIC
22965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichplatforms. */
23065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
23165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichtypedef struct verbitem {
23265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int   len;                 /* Length of verb name */
23365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int   op;                  /* Op when no arg, or -1 if arg mandatory */
23465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int   op_arg;              /* Op when arg present, or -1 if not allowed */
23565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} verbitem;
23665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
23765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const char verbnames[] =
23865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "\0"                       /* Empty name is a shorthand for MARK */
23965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_MARK0
24065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_ACCEPT0
24165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_COMMIT0
24265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_F0
24365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_FAIL0
24465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_PRUNE0
24565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_SKIP0
24665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_THEN;
24765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
24865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const verbitem verbs[] = {
24965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, -1,        OP_MARK },
25065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 4, -1,        OP_MARK },
25165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 6, OP_ACCEPT, -1 },
25265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 6, OP_COMMIT, -1 },
25365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 1, OP_FAIL,   -1 },
25465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 4, OP_FAIL,   -1 },
25565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 5, OP_PRUNE,  OP_PRUNE_ARG },
25665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 4, OP_SKIP,   OP_SKIP_ARG  },
25765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 4, OP_THEN,   OP_THEN_ARG  }
25865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich};
25965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
26065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const int verbcount = sizeof(verbs)/sizeof(verbitem);
26165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
26265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
26365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Substitutes for [[:<:]] and [[:>:]], which mean start and end of word in
26465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichanother regex library. */
26565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
26665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar sub_start_of_word[] = {
26765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK,
26865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w, CHAR_RIGHT_PARENTHESIS, '\0' };
26965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
27065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar sub_end_of_word[] = {
27165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK,
27265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w,
27365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_RIGHT_PARENTHESIS, '\0' };
27465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
27565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
27665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Tables of names of POSIX character classes and their lengths. The names are
27765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnow all in a single string, to reduce the number of relocations when a shared
27865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlibrary is dynamically loaded. The list of lengths is terminated by a zero
27965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength entry. The first three must be alpha, lower, upper, as this is assumed
28065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor handling case independence. The indices for graph, print, and punct are
28165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichneeded, so identify them. */
28265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
28365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const char posix_names[] =
28465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
28565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
28665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
28765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  STRING_word0  STRING_xdigit;
28865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
28965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 posix_name_lengths[] = {
29065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
29165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
29265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PC_GRAPH  8
29365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PC_PRINT  9
29465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PC_PUNCT 10
29565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
29665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
29765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Table of class bit maps for each POSIX class. Each class is formed from a
29865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbase map, with an optional addition or removal of another map. Then, for some
29965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichclasses, there is some additional tweaking: for [:blank:] the vertical space
30065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacters are removed, and for [:alpha:] and [:alnum:] the underscore
30165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacter is removed. The triples in the table consist of the base map offset,
30265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsecond map offset or -1 if no second map, and a non-negative value for map
30365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichaddition or a negative value for map subtraction (if there are two maps). The
30465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichabsolute value of the third field has these meanings: 0 => no tweaking, 1 =>
30565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichremove vertical space characters, 2 => remove underscore. */
30665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
30765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const int posix_class_maps[] = {
30865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_word,  cbit_digit, -2,             /* alpha */
30965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_lower, -1,          0,             /* lower */
31065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_upper, -1,          0,             /* upper */
31165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_word,  -1,          2,             /* alnum - word without underscore */
31265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_print, cbit_cntrl,  0,             /* ascii */
31365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_space, -1,          1,             /* blank - a GNU extension */
31465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_cntrl, -1,          0,             /* cntrl */
31565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_digit, -1,          0,             /* digit */
31665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_graph, -1,          0,             /* graph */
31765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_print, -1,          0,             /* print */
31865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_punct, -1,          0,             /* punct */
31965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_space, -1,          0,             /* space */
32065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_word,  -1,          0,             /* word - a Perl extension */
32165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cbit_xdigit,-1,          0              /* xdigit */
32265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich};
32365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
32465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Table of substitutes for \d etc when PCRE_UCP is set. They are replaced by
32565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichUnicode property escapes. */
32665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
32765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
32865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PNd[]  = {
32965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
33065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
33165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pNd[]  = {
33265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
33365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
33465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PXsp[] = {
33565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
33665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
33765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pXsp[] = {
33865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
33965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
34065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PXwd[] = {
34165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
34265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
34365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pXwd[] = {
34465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
34565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
34665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
34765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar *substitutes[] = {
34865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_PNd,           /* \D */
34965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_pNd,           /* \d */
35065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_PXsp,          /* \S */   /* Xsp is Perl space, but from 8.34, Perl */
35165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_pXsp,          /* \s */   /* space and POSIX space are the same. */
35265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_PXwd,          /* \W */
35365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_pXwd           /* \w */
35465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich};
35565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
35665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The POSIX class substitutes must be in the order of the POSIX class names,
35765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdefined above, and there are both positive and negative cases. NULL means no
35865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgeneral substitute of a Unicode property escape (\p or \P). However, for some
35965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPOSIX classes (e.g. graph, print, punct) a special property code is compiled
36065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdirectly. */
36165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
36265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pL[] =   {
36365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
36465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
36565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pLl[] =  {
36665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
36765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
36865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pLu[] =  {
36965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
37065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
37165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pXan[] = {
37265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
37365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
37465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_h[] =    {
37565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_h, '\0' };
37665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pXps[] = {
37765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
37865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
37965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PL[] =   {
38065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
38165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
38265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PLl[] =  {
38365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
38465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
38565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PLu[] =  {
38665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
38765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
38865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PXan[] = {
38965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
39065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
39165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_H[] =    {
39265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_H, '\0' };
39365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PXps[] = {
39465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
39565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
39665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
39765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar *posix_substitutes[] = {
39865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_pL,            /* alpha */
39965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_pLl,           /* lower */
40065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_pLu,           /* upper */
40165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_pXan,          /* alnum */
40265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* ascii */
40365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_h,             /* blank */
40465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* cntrl */
40565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_pNd,           /* digit */
40665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* graph */
40765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* print */
40865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* punct */
40965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_pXps,          /* space */   /* Xps is POSIX space, but from 8.34 */
41065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_pXwd,          /* word  */   /* Perl and POSIX space are the same */
41165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* xdigit */
41265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Negated cases */
41365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_PL,            /* ^alpha */
41465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_PLl,           /* ^lower */
41565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_PLu,           /* ^upper */
41665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_PXan,          /* ^alnum */
41765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* ^ascii */
41865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_H,             /* ^blank */
41965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* ^cntrl */
42065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_PNd,           /* ^digit */
42165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* ^graph */
42265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* ^print */
42365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* ^punct */
42465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_PXps,          /* ^space */  /* Xps is POSIX space, but from 8.34 */
42565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string_PXwd,          /* ^word */   /* Perl and POSIX space are the same */
42665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL                  /* ^xdigit */
42765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich};
42865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
42965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
43065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
43165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define STRING(a)  # a
43265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define XSTRING(s) STRING(s)
43365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
43465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The texts of compile-time error messages. These are "char *" because they
43565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichare passed to the outside world. Do not ever re-use any error number, because
43665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthey are documented. Always add a new error instead. Messages marked DEAD below
43765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichare no longer used. This used to be a table of strings, but in order to reduce
43865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe number of relocations needed when a shared library is loaded dynamically,
43965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichit is now one long string. We cannot use a table of offsets, because the
44065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
44165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsimply count through to the one we want - this isn't a performance issue
44265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause these strings are used only when there is a compilation error.
44365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
44465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichEach substring ends with \0 to insert a null character. This includes the final
44565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsubstring, so that the whole string ends with \0\0, which can be detected when
44665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcounting through. */
44765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
44865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const char error_texts[] =
44965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "no error\0"
45065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "\\ at end of pattern\0"
45165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "\\c at end of pattern\0"
45265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "unrecognized character follows \\\0"
45365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "numbers out of order in {} quantifier\0"
45465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 5 */
45565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "number too big in {} quantifier\0"
45665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "missing terminating ] for character class\0"
45765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "invalid escape sequence in character class\0"
45865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "range out of order in character class\0"
45965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "nothing to repeat\0"
46065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 10 */
46165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "operand of unlimited repeat could match the empty string\0"  /** DEAD **/
46265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "internal error: unexpected repeat\0"
46365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "unrecognized character after (? or (?-\0"
46465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "POSIX named classes are supported only within a class\0"
46565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "missing )\0"
46665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 15 */
46765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "reference to non-existent subpattern\0"
46865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "erroffset passed as NULL\0"
46965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "unknown option bit(s) set\0"
47065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "missing ) after comment\0"
47165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "parentheses nested too deeply\0"  /** DEAD **/
47265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 20 */
47365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "regular expression is too large\0"
47465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "failed to get memory\0"
47565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "unmatched parentheses\0"
47665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "internal error: code overflow\0"
47765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "unrecognized character after (?<\0"
47865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 25 */
47965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "lookbehind assertion is not fixed length\0"
48065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "malformed number or name after (?(\0"
48165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "conditional group contains more than two branches\0"
48265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "assertion expected after (?(\0"
48365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "(?R or (?[+-]digits must be followed by )\0"
48465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 30 */
48565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "unknown POSIX class name\0"
48665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "POSIX collating elements are not supported\0"
48765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "this version of PCRE is compiled without UTF support\0"
48865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "spare error\0"  /** DEAD **/
48965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "character value in \\x{} or \\o{} is too large\0"
49065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 35 */
49165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "invalid condition (?(0)\0"
49265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "\\C not allowed in lookbehind assertion\0"
49365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
49465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "number after (?C is > 255\0"
49565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "closing ) for (?C expected\0"
49665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 40 */
49765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "recursive call could loop indefinitely\0"
49865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "unrecognized character after (?P\0"
49965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "syntax error in subpattern name (missing terminator)\0"
50065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "two named subpatterns have the same name\0"
50165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "invalid UTF-8 string\0"
50265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 45 */
50365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "support for \\P, \\p, and \\X has not been compiled\0"
50465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "malformed \\P or \\p sequence\0"
50565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "unknown property name after \\P or \\p\0"
50665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
50765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
50865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 50 */
50965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "repeated subpattern is too long\0"    /** DEAD **/
51065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
51165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "internal error: overran compiling workspace\0"
51265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "internal error: previously-checked referenced subpattern not found\0"
51365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "DEFINE group contains more than one branch\0"
51465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 55 */
51565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "repeating a DEFINE group is not allowed\0"  /** DEAD **/
51665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "inconsistent NEWLINE options\0"
51765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
51865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "a numbered reference must not be zero\0"
51965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
52065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 60 */
52165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "(*VERB) not recognized or malformed\0"
52265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "number is too big\0"
52365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "subpattern name expected\0"
52465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "digit expected after (?+\0"
52565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "] is an invalid data character in JavaScript compatibility mode\0"
52665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 65 */
52765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "different names for subpatterns of the same number are not allowed\0"
52865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "(*MARK) must have an argument\0"
52965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "this version of PCRE is not compiled with Unicode property support\0"
53065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "\\c must be followed by an ASCII character\0"
53165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
53265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 70 */
53365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "internal error: unknown opcode in find_fixedlength()\0"
53465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "\\N is not supported in a class\0"
53565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "too many forward references\0"
53665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
53765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "invalid UTF-16 string\0"
53865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 75 */
53965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
54065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "character value in \\u.... sequence is too large\0"
54165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "invalid UTF-32 string\0"
54265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "setting UTF is disabled by the application\0"
54365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "non-hex character in \\x{} (closing brace missing?)\0"
54465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 80 */
54565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "non-octal character in \\o{} (closing brace missing?)\0"
54665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "missing opening brace after \\o\0"
54765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "parentheses are too deeply nested\0"
54865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "invalid range in character class\0"
54965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "group name must start with a non-digit\0"
55065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* 85 */
55165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "parentheses are too deeply nested (stack check)\0"
55265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  "digits missing in \\x{} or \\o{}\0"
55365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ;
55465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
55565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Table to identify digits and hex digits. This is used when compiling
55665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpatterns. Note that the tables in chartables are dependent on the locale, and
55765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmay mark arbitrary characters as digits - but the PCRE compiling code expects
55865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have
55965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevicha private table here. It costs 256 bytes, but it is a lot faster than doing
56065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacter value tests (at least in some simple cases I timed), and in some
56165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichapplications one wants PCRE to compile efficiently as well as match
56265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichefficiently.
56365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
56465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichFor convenience, we use the same bit definitions as in chartables:
56565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
56665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x04   decimal digit
56765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x08   hexadecimal digit
56865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
56965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThen we can use ctype_digit and ctype_xdigit in the code. */
57065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
57165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Using a simple comparison for decimal numbers rather than a memory read
57265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis much faster, and the resulting code is simpler (the compiler turns it
57365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinto a subtraction and unsigned comparison). */
57465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
57565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
57665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
57765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC
57865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
57965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
58065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichUTF-8 mode. */
58165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
58265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 digitab[] =
58365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
58465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
58565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15 */
58665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
58765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
58865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - '  */
58965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ( - /  */
59065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  */
59165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /*  8 - ?  */
59265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  @ - G  */
59365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  H - O  */
59465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  P - W  */
59565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  X - _  */
59665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  ` - g  */
59765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  h - o  */
59865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  p - w  */
59965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  x -127 */
60065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
60165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
60265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
60365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
60465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
60565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
60665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
60765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
60865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
60965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
61065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
61165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
61265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
61365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
61465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
61565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
61665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
61765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
61865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
61965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
62065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
62165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 digitab[] =
62265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
62365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
62465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15    */
62565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 10 */
62665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31    */
62765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  32- 39 20 */
62865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  40- 47    */
62965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  48- 55 30 */
63065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  56- 63    */
63165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */
63265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */
63365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */
63465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88- 95    */
63565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */
63665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */
63765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
63865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- "     */
63965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g  80 */
64065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  h -143    */
64165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p  90 */
64265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  q -159    */
64365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x  A0 */
64465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  y -175    */
64565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ^ -183 B0 */
64665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191    */
64765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  { - G  C0 */
64865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  H -207    */
64965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  } - P  D0 */
65065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  Q -223    */
65165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  \ - X  E0 */
65265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  Y -239    */
65365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  F0 */
65465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255    */
65565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
65665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
65765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*   0-  7 */
65865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
65965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*  16- 23 */
66065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
66165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*  32- 39 */
66265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  40- 47 */
66365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  48- 55 */
66465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  56- 63 */
66565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */
66665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */
66765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */
66865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88- 95 */
66965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */
67065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */
67165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
67265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- "  */
67365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g  */
67465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  h -143 */
67565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p  */
67665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  q -159 */
67765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x  */
67865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  y -175 */
67965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ^ -183 */
68065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
68165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  { - G  */
68265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  H -207 */
68365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  } - P  */
68465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  Q -223 */
68565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /*  \ - X  */
68665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  Y -239 */
68765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
68865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255 */
68965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
69065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
69165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
69265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This table is used to check whether auto-possessification is possible
69365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbetween adjacent character-type opcodes. The left-hand (repeated) opcode is
69465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichused to select the row, and the right-hand opcode is use to select the column.
69565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichA value of 1 means that auto-possessification is OK. For example, the second
69665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue in the first row means that \D+\d can be turned into \D++\d.
69765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
69865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThe Unicode property types (\P and \p) have to be present to fill out the table
69965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause of what their opcode values are, but the table values should always be
70065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichzero because property types are handled separately in the code. The last four
70165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcolumns apply to items that cannot be repeated, so there is no need to have
70265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
70365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
70465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
70565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
70665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
70765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
70865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 autoposstab[APTROWS][APTCOLS] = {
70965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* \D \d \S \s \W \w  . .+ \C \P \p \R \H \h \V \v \X \Z \z  $ $M */
71065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \D */
71165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \d */
71265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \S */
71365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \s */
71465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \W */
71565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \w */
71665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .  */
71765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .+ */
71865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \C */
71965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \P */
72065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \p */
72165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \R */
72265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \H */
72365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \h */
72465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \V */
72565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 },  /* \v */
72665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }   /* \X */
72765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich};
72865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
72965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
73065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This table is used to check whether auto-possessification is possible
73165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbetween adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
73265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichleft-hand (repeated) opcode is used to select the row, and the right-hand
73365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichopcode is used to select the column. The values are as follows:
73465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
73565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0   Always return FALSE (never auto-possessify)
73665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  1   Character groups are distinct (possessify if both are OP_PROP)
73765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  2   Check character categories in the same group (general or particular)
73865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  3   TRUE if the two opcodes are not the same (PROP vs NOTPROP)
73965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
74065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  4   Check left general category vs right particular category
74165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  5   Check right general category vs left particular category
74265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
74365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  6   Left alphanum vs right general category
74465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  7   Left space vs right general category
74565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  8   Left word vs right general category
74665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
74765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  9   Right alphanum vs left general category
74865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 10   Right space vs left general category
74965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 11   Right word vs left general category
75065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
75165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 12   Left alphanum vs right particular category
75265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 13   Left space vs right particular category
75365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 14   Left word vs right particular category
75465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
75565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 15   Right alphanum vs left particular category
75665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 16   Right space vs left particular category
75765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 17   Right word vs left particular category
75865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
75965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
76065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 propposstab[PT_TABSIZE][PT_TABSIZE] = {
76165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* ANY LAMP GC  PC  SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
76265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_ANY */
76365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0,  3,  0,  0,  0,    3,    1,      1,   0,    0,   0 },  /* PT_LAMP */
76465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0,  0,  2,  4,  0,    9,   10,     10,  11,    0,   0 },  /* PT_GC */
76565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0,  0,  5,  2,  0,   15,   16,     16,  17,    0,   0 },  /* PT_PC */
76665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0,  0,  0,  0,  2,    0,    0,      0,   0,    0,   0 },  /* PT_SC */
76765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0,  3,  6, 12,  0,    3,    1,      1,   0,    0,   0 },  /* PT_ALNUM */
76865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_SPACE */
76965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_PXSPACE */
77065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0,  0,  8, 14,  0,    0,    1,      1,   3,    0,   0 },  /* PT_WORD */
77165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_CLIST */
77265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   3 }   /* PT_UCNC */
77365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich};
77465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
77565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This table is used to check whether auto-possessification is possible
77665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbetween adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
77765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichspecifies a general category and the other specifies a particular category. The
77865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrow is selected by the general category and the column by the particular
77965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcategory. The value is 1 if the particular category is not part of the general
78065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcategory. */
78165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
78265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 catposstab[7][30] = {
78365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
78465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* C */
78565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* L */
78665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* M */
78765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* N */
78865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },  /* P */
78965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 },  /* S */
79065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }   /* Z */
79165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich};
79265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
79365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
79465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevicha general or particular category. The properties in each row are those
79565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthat apply to the character set in question. Duplication means that a little
79665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichunnecessary work is done when checking, but this keeps things much simpler
79765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause they can all use the same code. For more details see the comment where
79865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthis table is used.
79965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
80065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichNote: SPACE and PXSPACE used to be different because Perl excluded VT from
80165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"space", but from Perl 5.18 it's included, so both categories are treated the
80265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsame here. */
80365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
80465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 posspropstab[3][4] = {
80565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { ucp_L, ucp_N, ucp_N, ucp_Nl },  /* ALNUM, 3rd and 4th values redundant */
80665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { ucp_Z, ucp_Z, ucp_C, ucp_Cc },  /* SPACE and PXSPACE, 2nd value redundant */
80765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */
80865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich};
80965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
81065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This table is used when converting repeating opcodes into possessified
81165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichversions as a result of an explicit possessive quantifier such as ++. A zero
81265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue means there is no possessified version - in those cases the item in
81365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichquestion must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT
81465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause all relevant opcodes are less than that. */
81565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
81665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 opcode_possessify[] = {
81765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   /* 0 - 15  */
81865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   /* 16 - 31 */
81965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
82065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0,                       /* NOTI */
82165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_POSSTAR, 0,           /* STAR, MINSTAR */
82265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_POSPLUS, 0,           /* PLUS, MINPLUS */
82365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_POSQUERY, 0,          /* QUERY, MINQUERY */
82465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_POSUPTO, 0,           /* UPTO, MINUPTO */
82565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0,                       /* EXACT */
82665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0, 0, 0,              /* POS{STAR,PLUS,QUERY,UPTO} */
82765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
82865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_POSSTARI, 0,          /* STARI, MINSTARI */
82965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_POSPLUSI, 0,          /* PLUSI, MINPLUSI */
83065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_POSQUERYI, 0,         /* QUERYI, MINQUERYI */
83165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_POSUPTOI, 0,          /* UPTOI, MINUPTOI */
83265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0,                       /* EXACTI */
83365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0, 0, 0,              /* POS{STARI,PLUSI,QUERYI,UPTOI} */
83465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
83565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_NOTPOSSTAR, 0,        /* NOTSTAR, NOTMINSTAR */
83665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_NOTPOSPLUS, 0,        /* NOTPLUS, NOTMINPLUS */
83765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_NOTPOSQUERY, 0,       /* NOTQUERY, NOTMINQUERY */
83865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_NOTPOSUPTO, 0,        /* NOTUPTO, NOTMINUPTO */
83965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0,                       /* NOTEXACT */
84065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0, 0, 0,              /* NOTPOS{STAR,PLUS,QUERY,UPTO} */
84165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
84265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_NOTPOSSTARI, 0,       /* NOTSTARI, NOTMINSTARI */
84365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_NOTPOSPLUSI, 0,       /* NOTPLUSI, NOTMINPLUSI */
84465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_NOTPOSQUERYI, 0,      /* NOTQUERYI, NOTMINQUERYI */
84565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_NOTPOSUPTOI, 0,       /* NOTUPTOI, NOTMINUPTOI */
84665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0,                       /* NOTEXACTI */
84765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0, 0, 0,              /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */
84865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
84965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_TYPEPOSSTAR, 0,       /* TYPESTAR, TYPEMINSTAR */
85065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_TYPEPOSPLUS, 0,       /* TYPEPLUS, TYPEMINPLUS */
85165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_TYPEPOSQUERY, 0,      /* TYPEQUERY, TYPEMINQUERY */
85265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_TYPEPOSUPTO, 0,       /* TYPEUPTO, TYPEMINUPTO */
85365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0,                       /* TYPEEXACT */
85465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0, 0, 0,              /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */
85565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
85665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_CRPOSSTAR, 0,         /* CRSTAR, CRMINSTAR */
85765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_CRPOSPLUS, 0,         /* CRPLUS, CRMINPLUS */
85865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_CRPOSQUERY, 0,        /* CRQUERY, CRMINQUERY */
85965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OP_CRPOSRANGE, 0,        /* CRRANGE, CRMINRANGE */
86065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0, 0, 0,              /* CRPOS{STAR,PLUS,QUERY,RANGE} */
86165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
86265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0, 0,                 /* CLASS, NCLASS, XCLASS */
86365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0,                    /* REF, REFI */
86465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0,                    /* DNREF, DNREFI */
86565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0, 0                     /* RECURSE, CALLOUT */
86665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich};
86765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
86865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
86965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
87065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
87165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*            Find an error text                  *
87265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
87365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
87465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The error texts are now all in one long string, to save on relocations. As
87565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsome of the text is of unknown length, we can't use a table of offsets.
87665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichInstead, just count through the strings. This is not a performance issue
87765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause it happens only when there has been a compilation error.
87865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
87965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArgument:   the error number
88065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:    pointer to the error string
88165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
88265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
88365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const char *
88465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfind_error_text(int n)
88565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
88665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst char *s = error_texts;
88765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (; n > 0; n--)
88865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
88965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  while (*s++ != CHAR_NULL) {};
89065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (*s == CHAR_NULL) return "Error text not found (please report)";
89165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
89265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn s;
89365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
89465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
89565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
89665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
89765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
89865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*           Expand the workspace                 *
89965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
90065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
90165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called during the second compiling phase, if the number of
90265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichforward references fills the existing workspace, which is originally a block on
90365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe stack. A larger block is obtained from malloc() unless the ultimate limit
90465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhas been reached or the increase will be rather small.
90565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
90665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArgument: pointer to the compile data block
90765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:  0 if all went well, else an error number
90865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
90965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
91065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int
91165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichexpand_workspace(compile_data *cd)
91265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
91365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *newspace;
91465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint newsize = cd->workspace_size * 2;
91565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
91665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (newsize > COMPILE_WORK_SIZE_MAX) newsize = COMPILE_WORK_SIZE_MAX;
91765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->workspace_size >= COMPILE_WORK_SIZE_MAX ||
91865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    newsize - cd->workspace_size < WORK_SIZE_SAFETY_MARGIN)
91965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return ERR72;
92065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
92165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnewspace = (PUBL(malloc))(IN_UCHARS(newsize));
92265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (newspace == NULL) return ERR21;
92365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmemcpy(newspace, cd->start_workspace, cd->workspace_size * sizeof(pcre_uchar));
92465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->hwm = (pcre_uchar *)newspace + (cd->hwm - cd->start_workspace);
92565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->workspace_size > COMPILE_WORK_SIZE)
92665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  (PUBL(free))((void *)cd->start_workspace);
92765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_workspace = newspace;
92865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->workspace_size = newsize;
92965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn 0;
93065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
93165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
93265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
93365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
93465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
93565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*            Check for counted repeat            *
93665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
93765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
93865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called when a '{' is encountered in a place where it might
93965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstart a quantifier. It looks ahead to see if it really is a quantifier or not.
94065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIt is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
94165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhere the ddds are digits.
94265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
94365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
94465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  p         pointer to the first char after '{'
94565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
94665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:    TRUE or FALSE
94765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
94865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
94965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
95065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis_counted_repeat(const pcre_uchar *p)
95165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
95265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (!IS_DIGIT(*p)) return FALSE;
95365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichp++;
95465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (IS_DIGIT(*p)) p++;
95565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
95665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
95765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (*p++ != CHAR_COMMA) return FALSE;
95865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
95965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
96065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (!IS_DIGIT(*p)) return FALSE;
96165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichp++;
96265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (IS_DIGIT(*p)) p++;
96365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
96465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn (*p == CHAR_RIGHT_CURLY_BRACKET);
96565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
96665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
96765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
96865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
96965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
97065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*            Handle escapes                      *
97165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
97265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
97365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called when a \ has been encountered. It either returns a
97465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpositive value for a simple escape such as \n, or 0 for a data character which
97565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwill be placed in chptr. A backreference to group n is returned as negative n.
97665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichWhen UTF-8 is enabled, a positive value greater than 255 may be returned in
97765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichchptr. On entry, ptr is pointing at the \. On exit, it is on the final
97865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacter of the escape sequence.
97965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
98065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
98165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptrptr         points to the pattern position pointer
98265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  chptr          points to a returned data character
98365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcodeptr   points to the errorcode variable
98465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  bracount       number of previous extracting brackets
98565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  options        the options bits
98665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  isclass        TRUE if inside a character class
98765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
98865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:         zero => a data character
98965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                 positive => a special escape sequence
99065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                 negative => a back reference
99165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                 on error, errorcodeptr is set
99265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
99365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
99465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int
99565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcheck_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr,
99665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int bracount, int options, BOOL isclass)
99765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
99865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* PCRE_UTF16 has the same value as PCRE_UTF8. */
99965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL utf = (options & PCRE_UTF8) != 0;
100065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *ptr = *ptrptr + 1;
100165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 c;
100265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint escape = 0;
100365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint i;
100465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
100565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichGETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
100665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichptr--;                            /* Set pointer back to the last byte */
100765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
100865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If backslash is at the end of the pattern, it's an error. */
100965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
101065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (c == CHAR_NULL) *errorcodeptr = ERR1;
101165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
101265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Non-alphanumerics are literals. For digits or letters, do an initial lookup
101365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichin a table. A non-zero result is something that can be returned immediately.
101465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichOtherwise further processing may be required. */
101565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
101665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC  /* ASCII/UTF-8 coding */
101765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Not alphanumeric */
101865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse if (c < CHAR_0 || c > CHAR_z) {}
101965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse if ((i = escapes[c - CHAR_0]) != 0)
102065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
102165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
102265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else           /* EBCDIC coding */
102365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Not alphanumeric */
102465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
102565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse if ((i = escapes[c - 0x48]) != 0)  { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
102665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
102765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
102865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Escapes that need further processing, or are illegal. */
102965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
103065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse
103165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
103265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const pcre_uchar *oldptr;
103365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  BOOL braced, negated, overflow;
103465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int s;
103565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
103665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  switch (c)
103765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
103865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* A number of Perl escapes are not handled by PCRE. We give an explicit
103965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    error. */
104065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
104165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_l:
104265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_L:
104365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *errorcodeptr = ERR37;
104465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
104565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
104665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_u:
104765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
104865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
104965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* In JavaScript, \u must be followed by four hexadecimal numbers.
105065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      Otherwise it is a lowercase u letter. */
105165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
105265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
105365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
105465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
105565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
105665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        c = 0;
105765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        for (i = 0; i < 4; ++i)
105865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
105965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          register pcre_uint32 cc = *(++ptr);
106065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC  /* ASCII/UTF-8 coding */
106165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
106265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
106365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else           /* EBCDIC coding */
106465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
106565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
106665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
106765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
106865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
106965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
107065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (c > (utf ? 0x10ffffU : 0xffU))
107165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
107265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (c > (utf ? 0x10ffffU : 0xffffU))
107365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32
107465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (utf && c > 0x10ffffU)
107565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
107665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
107765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR76;
107865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
107965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
108065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
108165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
108265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
108365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR37;
108465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
108565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
108665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_U:
108765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* In JavaScript, \U is an uppercase U letter. */
108865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
108965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
109065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
109165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* In a character class, \g is just a literal "g". Outside a character
109265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    class, \g must be followed by one of a number of specific things:
109365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
109465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    (1) A number, either plain or braced. If positive, it is an absolute
109565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    backreference. If negative, it is a relative backreference. This is a Perl
109665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    5.10 feature.
109765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
109865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    (2) Perl 5.10 also supports \g{name} as a reference to a named group. This
109965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    is part of Perl's movement towards a unified syntax for back references. As
110065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    this is synonymous with \k{name}, we fudge it up by pretending it really
110165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    was \k.
110265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
110365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    (3) For Oniguruma compatibility we also support \g followed by a name or a
110465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    number either in angle brackets or in single quotes. However, these are
110565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    (possibly recursive) subroutine calls, _not_ backreferences. Just return
110665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    the ESC_g code (cf \k). */
110765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
110865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_g:
110965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (isclass) break;
111065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
111165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
111265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      escape = ESC_g;
111365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
111465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
111565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
111665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Handle the Perl-compatible cases */
111765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
111865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
111965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
112065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      const pcre_uchar *p;
112165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
112265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
112365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET)
112465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
112565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        escape = ESC_k;
112665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
112765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
112865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      braced = TRUE;
112965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr++;
113065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
113165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else braced = FALSE;
113265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
113365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (ptr[1] == CHAR_MINUS)
113465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
113565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      negated = TRUE;
113665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr++;
113765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
113865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else negated = FALSE;
113965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
114065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* The integer range is limited by the machine's int representation. */
114165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    s = 0;
114265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    overflow = FALSE;
114365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    while (IS_DIGIT(ptr[1]))
114465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
114565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (s > INT_MAX / 10 - 1) /* Integer overflow */
114665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
114765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        overflow = TRUE;
114865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
114965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
115065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      s = s * 10 + (int)(*(++ptr) - CHAR_0);
115165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
115265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (overflow) /* Integer overflow */
115365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
115465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (IS_DIGIT(ptr[1]))
115565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;
115665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR61;
115765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
115865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
115965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
116065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
116165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
116265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR57;
116365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
116465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
116565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
116665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (s == 0)
116765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
116865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR58;
116965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
117065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
117165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
117265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (negated)
117365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
117465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (s > bracount)
117565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
117665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *errorcodeptr = ERR15;
117765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
117865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
117965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      s = bracount - (s - 1);
118065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
118165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
118265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    escape = -s;
118365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
118465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
118565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* The handling of escape sequences consisting of a string of digits
118665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    starting with one that is not zero is not straightforward. Perl has changed
118765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    over the years. Nowadays \g{} for backreferences and \o{} for octal are
118865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    recommended to avoid the ambiguities in the old syntax.
118965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
119065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    Outside a character class, the digits are read as a decimal number. If the
119165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    number is less than 8 (used to be 10), or if there are that many previous
119265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    extracting left brackets, then it is a back reference. Otherwise, up to
119365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    three octal digits are read to form an escaped byte. Thus \123 is likely to
119465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    be octal 123 (cf \0123, which is octal 012 followed by the literal 3). If
119565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    the octal value is greater than 377, the least significant 8 bits are
119665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    taken. \8 and \9 are treated as the literal characters 8 and 9.
119765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
119865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    Inside a character class, \ followed by a digit is always either a literal
119965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    8 or 9 or an octal number. */
120065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
120165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
120265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
120365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
120465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (!isclass)
120565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
120665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      oldptr = ptr;
120765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* The integer range is limited by the machine's int representation. */
120865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      s = (int)(c -CHAR_0);
120965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      overflow = FALSE;
121065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (IS_DIGIT(ptr[1]))
121165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
121265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (s > INT_MAX / 10 - 1) /* Integer overflow */
121365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
121465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          overflow = TRUE;
121565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
121665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
121765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        s = s * 10 + (int)(*(++ptr) - CHAR_0);
121865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
121965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (overflow) /* Integer overflow */
122065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
122165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (IS_DIGIT(ptr[1]))
122265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr++;
122365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *errorcodeptr = ERR61;
122465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
122565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
122665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (s < 8 || s <= bracount)  /* Check for back reference */
122765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
122865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        escape = -s;
122965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
123065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
123165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr = oldptr;      /* Put the pointer back and fall through */
123265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
123365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
123465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Handle a digit following \ when the number is not a back reference. If
123565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    the first digit is 8 or 9, Perl used to generate a binary zero byte and
123665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    then treat the digit as a following literal. At least by Perl 5.18 this
123765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    changed so as not to insert the binary zero. */
123865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
123965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if ((c = *ptr) >= CHAR_8) break;
124065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
124165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Fall through with a digit less than 8 */
124265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
124365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* \0 always starts an octal number, but we may drop through to here with a
124465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    larger first octal digit. The original code used just to take the least
124565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    significant 8 bits of octal numbers (I think this is what early Perls used
124665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode,
124765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    but no more than 3 octal digits. */
124865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
124965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_0:
125065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c -= CHAR_0;
125165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
125265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        c = c * 8 + *(++ptr) - CHAR_0;
125365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE8
125465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (!utf && c > 0xff) *errorcodeptr = ERR51;
125565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
125665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
125765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
125865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* \o is a relatively new Perl feature, supporting a more general way of
125965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    specifying character codes in octal. The only supported form is \o{ddd}. */
126065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
126165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_o:
126265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else
126365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR86; else
126465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
126565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr += 2;
126665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c = 0;
126765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      overflow = FALSE;
126865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (*ptr >= CHAR_0 && *ptr <= CHAR_7)
126965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
127065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        register pcre_uint32 cc = *ptr++;
127165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
127265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE32
127365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (c >= 0x20000000l) { overflow = TRUE; break; }
127465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
127565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        c = (c << 3) + cc - CHAR_0 ;
127665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
127765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
127865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
127965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
128065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32
128165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
128265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
128365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
128465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (overflow)
128565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
128665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (*ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++;
128765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *errorcodeptr = ERR34;
128865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
128965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
129065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
129165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
129265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
129365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else *errorcodeptr = ERR80;
129465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
129565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
129665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
129765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* \x is complicated. In JavaScript, \x must be followed by two hexadecimal
129865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    numbers. Otherwise it is a lowercase x letter. */
129965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
130065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_x:
130165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
130265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
130365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
130465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
130565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
130665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        c = 0;
130765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        for (i = 0; i < 2; ++i)
130865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
130965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          register pcre_uint32 cc = *(++ptr);
131065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC  /* ASCII/UTF-8 coding */
131165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
131265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
131365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else           /* EBCDIC coding */
131465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
131565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
131665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
131765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
131865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
131965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }    /* End JavaScript handling */
132065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
132165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Handle \x in Perl's style. \x{ddd} is a character number which can be
132265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    greater than 0xff in utf or non-8bit mode, but only if the ddd are hex
132365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    digits. If not, { used to be treated as a data character. However, Perl
132465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    seems to read hex digits up to the first non-such, and ignore the rest, so
132565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    that, for example \x{zz} matches a binary zero. This seems crazy, so PCRE
132665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    now gives an error. */
132765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
132865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
132965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
133065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
133165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
133265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr += 2;
133365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
133465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
133565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR86;
133665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
133765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
133865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        c = 0;
133965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        overflow = FALSE;
134065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0)
134165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
134265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          register pcre_uint32 cc = *ptr++;
134365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
134465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
134565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE32
134665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (c >= 0x10000000l) { overflow = TRUE; break; }
134765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
134865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
134965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC  /* ASCII/UTF-8 coding */
135065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
135165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
135265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else           /* EBCDIC coding */
135365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
135465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
135565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
135665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
135765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
135865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
135965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
136065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
136165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32
136265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
136365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
136465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
136565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
136665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (overflow)
136765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
136865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0) ptr++;
136965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR34;
137065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
137165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
137265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
137365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
137465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
137565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
137665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
137765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* If the sequence of hex digits does not end with '}', give an error.
137865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        We used just to recognize this construct and fall through to the normal
137965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        \x handling, but nowadays Perl gives an error, which seems much more
138065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        sensible, so we do too. */
138165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
138265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else *errorcodeptr = ERR79;
138365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }   /* End of \x{} processing */
138465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
138565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Read a single-byte hex-defined char (up to two hex digits after \x) */
138665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
138765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else
138865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
138965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        c = 0;
139065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
139165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
139265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          pcre_uint32 cc;                          /* Some compilers don't like */
139365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          cc = *(++ptr);                           /* ++ in initializers */
139465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC  /* ASCII/UTF-8 coding */
139565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
139665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
139765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else           /* EBCDIC coding */
139865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
139965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
140065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
140165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
140265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }     /* End of \xdd handling */
140365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }       /* End of Perl-style \x handling */
140465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
140565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
140665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.
140765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    An error is given if the byte following \c is not an ASCII character. This
140865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    coding is ASCII-specific, but then the whole concept of \cx is
140965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
141065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
141165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_c:
141265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = *(++ptr);
141365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c == CHAR_NULL)
141465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
141565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR2;
141665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
141765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
141865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC    /* ASCII/UTF-8 coding */
141965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c > 127)  /* Excludes all non-ASCII in either mode */
142065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
142165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR68;
142265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
142365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
142465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c >= CHAR_a && c <= CHAR_z) c -= 32;
142565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c ^= 0x40;
142665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else             /* EBCDIC coding */
142765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c >= CHAR_a && c <= CHAR_z) c += 64;
142865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c ^= 0xC0;
142965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
143065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
143165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
143265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
143365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    other alphanumeric following \ is an error if PCRE_EXTRA was set;
143465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    otherwise, for Perl compatibility, it is a literal. This code looks a bit
143565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    odd, but there used to be some cases other than the default, and there may
143665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    be again in future, so I haven't "optimized" it. */
143765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
143865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    default:
143965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if ((options & PCRE_EXTRA) != 0) switch(c)
144065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
144165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      default:
144265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR3;
144365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
144465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
144565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
144665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
144765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
144865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
144965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Perl supports \N{name} for character names, as well as plain \N for "not
145065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnewline". PCRE does not support \N{name}. However, it does support
145165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichquantification such as \N{2,3}. */
145265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
145365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (escape == ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET &&
145465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     !is_counted_repeat(ptr+2))
145565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  *errorcodeptr = ERR37;
145665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
145765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If PCRE_UCP is set, we change the values for \d etc. */
145865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
145965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & PCRE_UCP) != 0 && escape >= ESC_D && escape <= ESC_w)
146065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  escape += (ESC_DU - ESC_D);
146165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
146265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Set the pointer to the final character before returning. */
146365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
146465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ptrptr = ptr;
146565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*chptr = c;
146665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn escape;
146765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
146865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
146965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
147065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
147165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
147265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
147365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*               Handle \P and \p                 *
147465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
147565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
147665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called after \P or \p has been encountered, provided that
147765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE is compiled with support for Unicode properties. On entry, ptrptr is
147865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpointing at the P or p. On exit, it is pointing at the final character of the
147965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichescape sequence.
148065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
148165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArgument:
148265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptrptr         points to the pattern position pointer
148365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  negptr         points to a boolean that is set TRUE for negation else FALSE
148465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptypeptr       points to an unsigned int that is set to the type value
148565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pdataptr       points to an unsigned int that is set to the detailed property value
148665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcodeptr   points to the error code variable
148765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
148865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:         TRUE if the type value was found, or FALSE for an invalid type
148965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
149065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
149165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
149265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichget_ucp(const pcre_uchar **ptrptr, BOOL *negptr, unsigned int *ptypeptr,
149365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  unsigned int *pdataptr, int *errorcodeptr)
149465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
149565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar c;
149665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint i, bot, top;
149765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *ptr = *ptrptr;
149865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar name[32];
149965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
150065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichc = *(++ptr);
150165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (c == CHAR_NULL) goto ERROR_RETURN;
150265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
150365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*negptr = FALSE;
150465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
150565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* \P or \p can be followed by a name in {}, optionally preceded by ^ for
150665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnegation. */
150765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
150865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (c == CHAR_LEFT_CURLY_BRACKET)
150965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
151065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
151165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
151265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *negptr = TRUE;
151365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ptr++;
151465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
151565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++)
151665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
151765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = *(++ptr);
151865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c == CHAR_NULL) goto ERROR_RETURN;
151965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c == CHAR_RIGHT_CURLY_BRACKET) break;
152065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    name[i] = c;
152165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
152265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
152365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  name[i] = 0;
152465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
152565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
152665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Otherwise there is just one following character */
152765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
152865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse
152965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
153065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  name[0] = c;
153165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  name[1] = 0;
153265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
153365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
153465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ptrptr = ptr;
153565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
153665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Search for a recognized property name using binary chop */
153765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
153865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbot = 0;
153965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichtop = PRIV(utt_size);
154065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
154165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (bot < top)
154265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
154365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int r;
154465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  i = (bot + top) >> 1;
154565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
154665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (r == 0)
154765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
154865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *ptypeptr = PRIV(utt)[i].type;
154965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *pdataptr = PRIV(utt)[i].value;
155065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return TRUE;
155165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
155265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (r > 0) bot = i + 1; else top = i;
155365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
155465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
155565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*errorcodeptr = ERR47;
155665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ptrptr = ptr;
155765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn FALSE;
155865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
155965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichERROR_RETURN:
156065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*errorcodeptr = ERR46;
156165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ptrptr = ptr;
156265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn FALSE;
156365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
156465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
156565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
156665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
156765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
156865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
156965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*         Read repeat counts                     *
157065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
157165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
157265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Read an item of the form {n,m} and return the values. This is called only
157365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichafter is_counted_repeat() has confirmed that a repeat-count quantifier exists,
157465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichso the syntax is guaranteed to be correct, but we need to check the values.
157565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
157665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
157765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  p              pointer to first char after '{'
157865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  minp           pointer to int for min
157965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  maxp           pointer to int for max
158065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                 returned as -1 if no max
158165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcodeptr   points to error code variable
158265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
158365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:         pointer to '}' on success;
158465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                 current ptr on error, with errorcodeptr set non-zero
158565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
158665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
158765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar *
158865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichread_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr)
158965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
159065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint min = 0;
159165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint max = -1;
159265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
159365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (IS_DIGIT(*p))
159465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
159565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  min = min * 10 + (int)(*p++ - CHAR_0);
159665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (min > 65535)
159765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
159865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *errorcodeptr = ERR5;
159965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return p;
160065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
160165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
160265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
160365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
160465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
160565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
160665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
160765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    max = 0;
160865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    while(IS_DIGIT(*p))
160965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
161065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      max = max * 10 + (int)(*p++ - CHAR_0);
161165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (max > 65535)
161265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
161365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *errorcodeptr = ERR5;
161465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        return p;
161565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
161665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
161765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (max < min)
161865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
161965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR4;
162065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      return p;
162165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
162265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
162365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
162465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
162565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*minp = min;
162665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*maxp = max;
162765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn p;
162865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
162965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
163065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
163165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
163265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
163365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*      Find first significant op code            *
163465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
163565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
163665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is called by several functions that scan a compiled expression looking
163765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor a fixed first character, or an anchoring op code etc. It skips over things
163865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthat do not influence this. For some calls, it makes sense to skip negative
163965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichforward and all backward assertions, and also the \b assertion; for others it
164065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdoes not.
164165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
164265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
164365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code         pointer to the start of the group
164465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  skipassert   TRUE if certain assertions are to be skipped
164565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
164665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:       pointer to the first significant opcode
164765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
164865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
164965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar*
165065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirst_significant_code(const pcre_uchar *code, BOOL skipassert)
165165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
165265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;)
165365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
165465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  switch ((int)*code)
165565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
165665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ASSERT_NOT:
165765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ASSERTBACK:
165865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ASSERTBACK_NOT:
165965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (!skipassert) return code;
166065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    do code += GET(code, 1); while (*code == OP_ALT);
166165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += PRIV(OP_lengths)[*code];
166265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
166365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
166465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_WORD_BOUNDARY:
166565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_WORD_BOUNDARY:
166665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (!skipassert) return code;
166765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Fall through */
166865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
166965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CALLOUT:
167065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CREF:
167165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DNCREF:
167265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_RREF:
167365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DNRREF:
167465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DEF:
167565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += PRIV(OP_lengths)[*code];
167665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
167765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
167865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    default:
167965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return code;
168065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
168165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
168265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Control never reaches here */
168365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
168465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
168565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
168665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
168765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
168865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*        Find the fixed length of a branch       *
168965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
169065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
169165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Scan a branch and compute the fixed length of subject that will match it,
169265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif the length is fixed. This is needed for dealing with backward assertions.
169365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIn UTF8 mode, the result is in characters rather than bytes. The branch is
169465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichtemporarily terminated with OP_END when this function is called.
169565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
169665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThis function is called when a backward assertion is encountered, so that if it
169765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfails, the error message can point to the correct place in the pattern.
169865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichHowever, we cannot do this when the assertion contains subroutine calls,
169965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause they can be forward references. We solve this by remembering this case
170065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichand doing the check at the end; a flag specifies which mode we are running in.
170165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
170265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
170365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code     points to the start of the pattern (the bracket)
170465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode
170565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  atend    TRUE if called when the pattern is complete
170665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd       the "compile data" structure
170765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
170865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:   the fixed length,
170965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             or -1 if there is no fixed length,
171065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             or -2 if \C was encountered (in UTF-8 mode only)
171165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             or -3 if an OP_RECURSE item was encountered and atend is FALSE
171265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             or -4 if an unknown opcode was encountered (internal error)
171365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
171465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
171565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int
171665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfind_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd)
171765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
171865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint length = -1;
171965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
172065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister int branchlength = 0;
172165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uchar *cc = code + 1 + LINK_SIZE;
172265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
172365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Scan along the opcodes for this branch. If we get to the end of the
172465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbranch, check the length against that of the other branches. */
172565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
172665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;)
172765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
172865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int d;
172965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uchar *ce, *cs;
173065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  register pcre_uchar op = *cc;
173165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
173265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  switch (op)
173365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
173465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* We only need to continue for OP_CBRA (normal capturing bracket) and
173565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    OP_BRA (normal non-capturing bracket) because the other variants of these
173665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    opcodes are all concerned with unlimited repeated groups, which of course
173765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    are not of fixed length. */
173865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
173965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CBRA:
174065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_BRA:
174165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ONCE:
174265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ONCE_NC:
174365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_COND:
174465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd);
174565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (d < 0) return d;
174665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    branchlength += d;
174765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    do cc += GET(cc, 1); while (*cc == OP_ALT);
174865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += 1 + LINK_SIZE;
174965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
175065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
175165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Reached end of a branch; if it's a ket it is the end of a nested call.
175265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    If it's ALT it is an alternation in a nested call. An ACCEPT is effectively
175365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    an ALT. If it is END it's the end of the outer call. All can be handled by
175465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    the same code. Note that we must not include the OP_KETRxxx opcodes here,
175565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    because they all imply an unlimited repeat. */
175665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
175765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ALT:
175865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_KET:
175965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_END:
176065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ACCEPT:
176165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ASSERT_ACCEPT:
176265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (length < 0) length = branchlength;
176365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (length != branchlength) return -1;
176465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (*cc != OP_ALT) return length;
176565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += 1 + LINK_SIZE;
176665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    branchlength = 0;
176765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
176865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
176965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* A true recursion implies not fixed length, but a subroutine call may
177065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    be OK. If the subroutine is a forward reference, we can't deal with
177165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    it until the end of the pattern, so return -3. */
177265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
177365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_RECURSE:
177465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (!atend) return -3;
177565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
177665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
177765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (cc > cs && cc < ce) return -1;                    /* Recursion */
177865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd);
177965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (d < 0) return d;
178065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    branchlength += d;
178165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += 1 + LINK_SIZE;
178265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
178365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
178465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Skip over assertive subpatterns */
178565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
178665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ASSERT:
178765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ASSERT_NOT:
178865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ASSERTBACK:
178965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ASSERTBACK_NOT:
179065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    do cc += GET(cc, 1); while (*cc == OP_ALT);
179165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += PRIV(OP_lengths)[*cc];
179265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
179365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
179465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Skip over things that don't match chars */
179565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
179665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MARK:
179765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PRUNE_ARG:
179865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SKIP_ARG:
179965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_THEN_ARG:
180065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += cc[1] + PRIV(OP_lengths)[*cc];
180165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
180265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
180365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CALLOUT:
180465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CIRC:
180565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CIRCM:
180665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CLOSE:
180765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_COMMIT:
180865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CREF:
180965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DEF:
181065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DNCREF:
181165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DNRREF:
181265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DOLL:
181365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DOLLM:
181465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_EOD:
181565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_EODN:
181665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_FAIL:
181765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_WORD_BOUNDARY:
181865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PRUNE:
181965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_REVERSE:
182065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_RREF:
182165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SET_SOM:
182265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SKIP:
182365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SOD:
182465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SOM:
182565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_THEN:
182665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_WORD_BOUNDARY:
182765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += PRIV(OP_lengths)[*cc];
182865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
182965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
183065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Handle literal characters */
183165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
183265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CHAR:
183365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CHARI:
183465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT:
183565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTI:
183665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    branchlength++;
183765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += 2;
183865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
183965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
184065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
184165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
184265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
184365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Handle exact repetitions. The count is already in characters, but we
184465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    need to skip over a multibyte character in UTF8 mode.  */
184565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
184665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_EXACT:
184765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_EXACTI:
184865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTEXACT:
184965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTEXACTI:
185065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    branchlength += (int)GET2(cc,1);
185165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += 2 + IMM2_SIZE;
185265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
185365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
185465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
185565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
185665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
185765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEEXACT:
185865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    branchlength += GET2(cc,1);
185965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP)
186065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      cc += 2;
186165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += 1 + IMM2_SIZE + 1;
186265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
186365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
186465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Handle single-char matchers */
186565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
186665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PROP:
186765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPROP:
186865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += 2;
186965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Fall through */
187065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
187165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_HSPACE:
187265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_VSPACE:
187365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_HSPACE:
187465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_VSPACE:
187565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_DIGIT:
187665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DIGIT:
187765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_WHITESPACE:
187865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_WHITESPACE:
187965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_WORDCHAR:
188065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_WORDCHAR:
188165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ANY:
188265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ALLANY:
188365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    branchlength++;
188465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc++;
188565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
188665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
188765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
188865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    otherwise \C is coded as OP_ALLANY. */
188965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
189065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ANYBYTE:
189165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return -2;
189265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
189365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Check a class for variable quantification */
189465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
189565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CLASS:
189665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NCLASS:
189765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
189865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_XCLASS:
189965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* The original code caused an unsigned overflow in 64 bit systems,
190065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    so now we use a conditional statement. */
190165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (op == OP_XCLASS)
190265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      cc += GET(cc, 1);
190365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
190465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      cc += PRIV(OP_lengths)[OP_CLASS];
190565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
190665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += PRIV(OP_lengths)[OP_CLASS];
190765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
190865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
190965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    switch (*cc)
191065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
191165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRSTAR:
191265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRMINSTAR:
191365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRPLUS:
191465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRMINPLUS:
191565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRQUERY:
191665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRMINQUERY:
191765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRPOSSTAR:
191865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRPOSPLUS:
191965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRPOSQUERY:
192065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      return -1;
192165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
192265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRRANGE:
192365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRMINRANGE:
192465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRPOSRANGE:
192565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;
192665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      branchlength += (int)GET2(cc,1);
192765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      cc += 1 + 2 * IMM2_SIZE;
192865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
192965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
193065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      default:
193165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      branchlength++;
193265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
193365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
193465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
193565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Anything else is variable length */
193665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
193765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ANYNL:
193865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_BRAMINZERO:
193965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_BRAPOS:
194065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_BRAPOSZERO:
194165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_BRAZERO:
194265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CBRAPOS:
194365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_EXTUNI:
194465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_KETRMAX:
194565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_KETRMIN:
194665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_KETRPOS:
194765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINPLUS:
194865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINPLUSI:
194965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINQUERY:
195065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINQUERYI:
195165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINSTAR:
195265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINSTARI:
195365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINUPTO:
195465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINUPTOI:
195565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINPLUS:
195665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINPLUSI:
195765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINQUERY:
195865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINQUERYI:
195965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINSTAR:
196065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINSTARI:
196165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINUPTO:
196265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINUPTOI:
196365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPLUS:
196465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPLUSI:
196565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSPLUS:
196665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSPLUSI:
196765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSQUERY:
196865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSQUERYI:
196965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSSTAR:
197065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSSTARI:
197165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSUPTO:
197265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSUPTOI:
197365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTQUERY:
197465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTQUERYI:
197565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTSTAR:
197665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTSTARI:
197765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTUPTO:
197865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTUPTOI:
197965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PLUS:
198065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PLUSI:
198165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSPLUS:
198265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSPLUSI:
198365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSQUERY:
198465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSQUERYI:
198565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSSTAR:
198665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSSTARI:
198765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSUPTO:
198865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSUPTOI:
198965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_QUERY:
199065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_QUERYI:
199165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_REF:
199265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_REFI:
199365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DNREF:
199465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DNREFI:
199565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SBRA:
199665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SBRAPOS:
199765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SCBRA:
199865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SCBRAPOS:
199965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SCOND:
200065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SKIPZERO:
200165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_STAR:
200265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_STARI:
200365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINPLUS:
200465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINQUERY:
200565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINSTAR:
200665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINUPTO:
200765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPLUS:
200865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSPLUS:
200965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSQUERY:
201065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSSTAR:
201165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSUPTO:
201265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEQUERY:
201365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPESTAR:
201465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEUPTO:
201565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_UPTO:
201665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_UPTOI:
201765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return -1;
201865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
201965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Catch unrecognized opcodes so that when new ones are added they
202065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    are not forgotten, as has happened in the past. */
202165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
202265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    default:
202365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return -4;
202465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
202565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
202665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Control never gets here */
202765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
202865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
202965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
203065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
203165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
203265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*    Scan compiled regex for specific bracket    *
203365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
203465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
203565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This little function scans through a compiled pattern until it finds a
203665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcapturing bracket with the given number, or, if the number is negative, an
203765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinstance of OP_REVERSE for a lookbehind. The function is global in the C sense
203865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichso that it can be called from pcre_study() when finding the minimum matching
203965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength.
204065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
204165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
204265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code        points to start of expression
204365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
204465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  number      the required bracket number or negative to find a lookbehind
204565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
204665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:      pointer to the opcode for the bracket, or NULL if not found
204765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
204865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
204965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *
205065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPRIV(find_bracket)(const pcre_uchar *code, BOOL utf, int number)
205165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
205265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;)
205365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
205465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  register pcre_uchar c = *code;
205565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
205665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_END) return NULL;
205765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
205865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* XCLASS is used for classes that cannot be represented just by a bit
205965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  map. This includes negated single high-valued characters. The length in
206065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  the table is zero; the actual length is stored in the compiled code. */
206165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
206265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_XCLASS) code += GET(code, 1);
206365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
206465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Handle recursion */
206565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
206665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (c == OP_REVERSE)
206765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
206865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (number < 0) return (pcre_uchar *)code;
206965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += PRIV(OP_lengths)[c];
207065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
207165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
207265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Handle capturing bracket */
207365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
207465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (c == OP_CBRA || c == OP_SCBRA ||
207565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich           c == OP_CBRAPOS || c == OP_SCBRAPOS)
207665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
207765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    int n = (int)GET2(code, 1+LINK_SIZE);
207865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (n == number) return (pcre_uchar *)code;
207965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += PRIV(OP_lengths)[c];
208065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
208165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
208265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Otherwise, we can get the item's length from the table, except that for
208365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  repeated character types, we have to test for \p and \P, which have an extra
208465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
208565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  must add in its length. */
208665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
208765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
208865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
208965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    switch(c)
209065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
209165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPESTAR:
209265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEMINSTAR:
209365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEPLUS:
209465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEMINPLUS:
209565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEQUERY:
209665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEMINQUERY:
209765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEPOSSTAR:
209865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEPOSPLUS:
209965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEPOSQUERY:
210065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
210165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
210265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
210365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEUPTO:
210465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEMINUPTO:
210565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEEXACT:
210665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEPOSUPTO:
210765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
210865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code += 2;
210965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
211065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
211165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MARK:
211265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_PRUNE_ARG:
211365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_SKIP_ARG:
211465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_THEN_ARG:
211565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      code += code[1];
211665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
211765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
211865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
211965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Add in the fixed length from the table */
212065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
212165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += PRIV(OP_lengths)[c];
212265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
212365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* In UTF-8 mode, opcodes that are followed by a character may be followed by
212465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  a multi-byte character. The length in the table is a minimum, so we have to
212565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  arrange to skip the extra bytes. */
212665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
212765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
212865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (utf) switch(c)
212965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
213065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CHAR:
213165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CHARI:
213265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_EXACT:
213365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_EXACTI:
213465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_UPTO:
213565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_UPTOI:
213665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINUPTO:
213765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINUPTOI:
213865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSUPTO:
213965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSUPTOI:
214065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_STAR:
214165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_STARI:
214265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINSTAR:
214365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINSTARI:
214465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSSTAR:
214565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSSTARI:
214665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_PLUS:
214765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_PLUSI:
214865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINPLUS:
214965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINPLUSI:
215065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSPLUS:
215165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSPLUSI:
215265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_QUERY:
215365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_QUERYI:
215465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINQUERY:
215565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINQUERYI:
215665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSQUERY:
215765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSQUERYI:
215865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
215965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
216065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
216165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
216265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    (void)(utf);  /* Keep compiler happy by referencing function argument */
216365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
216465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
216565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
216665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
216765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
216865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
216965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
217065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
217165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*   Scan compiled regex for recursion reference  *
217265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
217365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
217465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This little function scans through a compiled pattern until it finds an
217565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinstance of OP_RECURSE.
217665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
217765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
217865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code        points to start of expression
217965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
218065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
218165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:      pointer to the opcode for OP_RECURSE, or NULL if not found
218265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
218365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
218465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar *
218565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfind_recurse(const pcre_uchar *code, BOOL utf)
218665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
218765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;)
218865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
218965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  register pcre_uchar c = *code;
219065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_END) return NULL;
219165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_RECURSE) return code;
219265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
219365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* XCLASS is used for classes that cannot be represented just by a bit
219465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  map. This includes negated single high-valued characters. The length in
219565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  the table is zero; the actual length is stored in the compiled code. */
219665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
219765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_XCLASS) code += GET(code, 1);
219865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
219965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Otherwise, we can get the item's length from the table, except that for
220065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  repeated character types, we have to test for \p and \P, which have an extra
220165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
220265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  must add in its length. */
220365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
220465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
220565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
220665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    switch(c)
220765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
220865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPESTAR:
220965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEMINSTAR:
221065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEPLUS:
221165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEMINPLUS:
221265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEQUERY:
221365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEMINQUERY:
221465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEPOSSTAR:
221565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEPOSPLUS:
221665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEPOSQUERY:
221765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
221865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
221965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
222065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEPOSUPTO:
222165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEUPTO:
222265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEMINUPTO:
222365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_TYPEEXACT:
222465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
222565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code += 2;
222665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
222765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
222865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MARK:
222965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_PRUNE_ARG:
223065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_SKIP_ARG:
223165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_THEN_ARG:
223265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      code += code[1];
223365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
223465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
223565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
223665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Add in the fixed length from the table */
223765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
223865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += PRIV(OP_lengths)[c];
223965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
224065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* In UTF-8 mode, opcodes that are followed by a character may be followed
224165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    by a multi-byte character. The length in the table is a minimum, so we have
224265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    to arrange to skip the extra bytes. */
224365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
224465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
224565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (utf) switch(c)
224665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
224765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CHAR:
224865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CHARI:
224965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOT:
225065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTI:
225165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_EXACT:
225265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_EXACTI:
225365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTEXACT:
225465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTEXACTI:
225565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_UPTO:
225665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_UPTOI:
225765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTUPTO:
225865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTUPTOI:
225965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINUPTO:
226065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINUPTOI:
226165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTMINUPTO:
226265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTMINUPTOI:
226365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSUPTO:
226465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSUPTOI:
226565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTPOSUPTO:
226665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTPOSUPTOI:
226765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_STAR:
226865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_STARI:
226965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTSTAR:
227065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTSTARI:
227165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINSTAR:
227265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINSTARI:
227365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTMINSTAR:
227465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTMINSTARI:
227565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSSTAR:
227665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSSTARI:
227765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTPOSSTAR:
227865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTPOSSTARI:
227965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_PLUS:
228065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_PLUSI:
228165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTPLUS:
228265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTPLUSI:
228365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINPLUS:
228465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINPLUSI:
228565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTMINPLUS:
228665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTMINPLUSI:
228765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSPLUS:
228865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSPLUSI:
228965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTPOSPLUS:
229065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTPOSPLUSI:
229165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_QUERY:
229265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_QUERYI:
229365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTQUERY:
229465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTQUERYI:
229565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINQUERY:
229665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_MINQUERYI:
229765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTMINQUERY:
229865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTMINQUERYI:
229965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSQUERY:
230065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_POSQUERYI:
230165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTPOSQUERY:
230265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTPOSQUERYI:
230365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
230465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
230565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
230665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
230765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    (void)(utf);  /* Keep compiler happy by referencing function argument */
230865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
230965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
231065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
231165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
231265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
231365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
231465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
231565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
231665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*    Scan compiled branch for non-emptiness      *
231765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
231865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
231965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function scans through a branch of a compiled pattern to see whether it
232065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcan match the empty string or not. It is called from could_be_empty()
232165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbelow and from compile_branch() when checking for an unlimited repeat of a
232265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgroup that can match nothing. Note that first_significant_code() skips over
232365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbackward and negative forward assertions when its final argument is TRUE. If we
232465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhit an unclosed bracket, we return "empty" - this means we've struck an inner
232565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbracket whose current branch will already have been scanned.
232665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
232765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
232865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code        points to start of search
232965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  endcode     points to where to stop
233065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
233165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd          contains pointers to tables etc.
233265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  recurses    chain of recurse_check to catch mutual recursion
233365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
233465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:      TRUE if what is matched could be empty
233565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
233665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
233765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichtypedef struct recurse_check {
233865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  struct recurse_check *prev;
233965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const pcre_uchar *group;
234065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} recurse_check;
234165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
234265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
234365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcould_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
234465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  BOOL utf, compile_data *cd, recurse_check *recurses)
234565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
234665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uchar c;
234765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrecurse_check this_recurse;
234865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
234965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
235065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     code < endcode;
235165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
235265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
235365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const pcre_uchar *ccode;
235465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
235565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  c = *code;
235665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
235765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Skip over forward assertions; the other assertions are skipped by
235865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  first_significant_code() with a TRUE final argument. */
235965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
236065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_ASSERT)
236165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
236265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    do code += GET(code, 1); while (*code == OP_ALT);
236365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = *code;
236465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
236565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
236665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
236765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* For a recursion/subroutine call, if its end has been reached, which
236865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  implies a backward reference subroutine call, we can scan it. If it's a
236965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  forward reference subroutine call, we can't. To detect forward reference
237065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  we have to scan up the list that is kept in the workspace. This function is
237165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  called only when doing the real compile, not during the pre-compile that
237265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  measures the size of the compiled pattern. */
237365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
237465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_RECURSE)
237565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
237665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    const pcre_uchar *scode = cd->start_code + GET(code, 1);
237765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    const pcre_uchar *endgroup = scode;
237865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    BOOL empty_branch;
237965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
238065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Test for forward reference or uncompleted reference. This is disabled
238165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    when called to scan a completed pattern by setting cd->start_workspace to
238265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    NULL. */
238365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
238465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (cd->start_workspace != NULL)
238565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
238665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      const pcre_uchar *tcode;
238765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
238865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
238965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
239065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
239165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
239265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If the reference is to a completed group, we need to detect whether this
239365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    is a recursive call, as otherwise there will be an infinite loop. If it is
239465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    a recursion, just skip over it. Simple recursions are easily detected. For
239565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    mutual recursions we keep a chain on the stack. */
239665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
239765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
239865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
239965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
240065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
240165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      recurse_check *r = recurses;
240265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      for (r = recurses; r != NULL; r = r->prev)
240365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (r->group == scode) break;
240465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (r != NULL) continue;   /* Mutual recursion */
240565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
240665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
240765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Completed reference; scan the referenced group, remembering it on the
240865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    stack chain to detect mutual recursions. */
240965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
241065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    empty_branch = FALSE;
241165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    this_recurse.prev = recurses;
241265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    this_recurse.group = scode;
241365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
241465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    do
241565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
241665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
241765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
241865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        empty_branch = TRUE;
241965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
242065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
242165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      scode += GET(scode, 1);
242265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
242365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    while (*scode == OP_ALT);
242465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
242565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (!empty_branch) return FALSE;  /* All branches are non-empty */
242665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
242765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
242865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
242965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Groups with zero repeats can of course be empty; skip them. */
243065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
243165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||
243265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c == OP_BRAPOSZERO)
243365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
243465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += PRIV(OP_lengths)[c];
243565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    do code += GET(code, 1); while (*code == OP_ALT);
243665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = *code;
243765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
243865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
243965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
244065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* A nested group that is already marked as "could be empty" can just be
244165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  skipped. */
244265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
244365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_SBRA  || c == OP_SBRAPOS ||
244465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c == OP_SCBRA || c == OP_SCBRAPOS)
244565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
244665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    do code += GET(code, 1); while (*code == OP_ALT);
244765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = *code;
244865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
244965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
245065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
245165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* For other groups, scan the branches. */
245265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
245365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_BRA  || c == OP_BRAPOS ||
245465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c == OP_CBRA || c == OP_CBRAPOS ||
245565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c == OP_ONCE || c == OP_ONCE_NC ||
245665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c == OP_COND)
245765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
245865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    BOOL empty_branch;
245965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
246065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
246165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If a conditional group has only one branch, there is a second, implied,
246265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    empty branch, so just skip over the conditional, because it could be empty.
246365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    Otherwise, scan the individual branches of the group. */
246465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
246565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
246665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      code += GET(code, 1);
246765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
246865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
246965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      empty_branch = FALSE;
247065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      do
247165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
247265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
247365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          empty_branch = TRUE;
247465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code += GET(code, 1);
247565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
247665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (*code == OP_ALT);
247765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (!empty_branch) return FALSE;   /* All branches are non-empty */
247865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
247965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
248065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = *code;
248165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
248265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
248365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
248465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Handle the other opcodes */
248565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
248665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  switch (c)
248765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
248865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Check for quantifiers after a class. XCLASS is used for classes that
248965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cannot be represented just by a bit map. This includes negated single
249065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    high-valued characters. The length in PRIV(OP_lengths)[] is zero; the
249165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    actual length is stored in the compiled code, so we must update "code"
249265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    here. */
249365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
249465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
249565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_XCLASS:
249665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ccode = code += GET(code, 1);
249765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    goto CHECK_CLASS_REPEAT;
249865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
249965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
250065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CLASS:
250165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NCLASS:
250265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ccode = code + PRIV(OP_lengths)[OP_CLASS];
250365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
250465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
250565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    CHECK_CLASS_REPEAT:
250665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
250765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
250865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    switch (*ccode)
250965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
251065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRSTAR:            /* These could be empty; continue */
251165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRMINSTAR:
251265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRQUERY:
251365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRMINQUERY:
251465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRPOSSTAR:
251565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRPOSQUERY:
251665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
251765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
251865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      default:                   /* Non-repeat => class must match */
251965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRPLUS:            /* These repeats aren't empty */
252065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRMINPLUS:
252165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRPOSPLUS:
252265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      return FALSE;
252365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
252465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRRANGE:
252565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRMINRANGE:
252665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CRPOSRANGE:
252765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (GET2(ccode, 1) > 0) return FALSE;  /* Minimum > 0 */
252865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
252965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
253065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
253165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
253265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Opcodes that must match a character */
253365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
253465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ANY:
253565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ALLANY:
253665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ANYBYTE:
253765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
253865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PROP:
253965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPROP:
254065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ANYNL:
254165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
254265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_HSPACE:
254365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_HSPACE:
254465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_VSPACE:
254565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_VSPACE:
254665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_EXTUNI:
254765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
254865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_DIGIT:
254965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_DIGIT:
255065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_WHITESPACE:
255165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_WHITESPACE:
255265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT_WORDCHAR:
255365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_WORDCHAR:
255465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
255565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CHAR:
255665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CHARI:
255765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT:
255865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTI:
255965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
256065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PLUS:
256165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PLUSI:
256265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINPLUS:
256365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINPLUSI:
256465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
256565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPLUS:
256665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPLUSI:
256765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINPLUS:
256865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINPLUSI:
256965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
257065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSPLUS:
257165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSPLUSI:
257265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSPLUS:
257365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSPLUSI:
257465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
257565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_EXACT:
257665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_EXACTI:
257765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTEXACT:
257865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTEXACTI:
257965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
258065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPLUS:
258165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINPLUS:
258265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSPLUS:
258365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEEXACT:
258465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
258565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return FALSE;
258665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
258765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* These are going to continue, as they may be empty, but we have to
258865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    fudge the length for the \p and \P cases. */
258965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
259065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPESTAR:
259165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINSTAR:
259265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSSTAR:
259365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEQUERY:
259465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINQUERY:
259565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSQUERY:
259665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
259765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
259865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
259965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Same for these */
260065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
260165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEUPTO:
260265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINUPTO:
260365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSUPTO:
260465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
260565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      code += 2;
260665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
260765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
260865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* End of branch */
260965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
261065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_KET:
261165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_KETRMAX:
261265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_KETRMIN:
261365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_KETRPOS:
261465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ALT:
261565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return TRUE;
261665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
261765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
261865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    MINUPTO, and POSUPTO and their caseless and negative versions may be
261965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    followed by a multibyte character. */
262065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
262165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
262265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_STAR:
262365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_STARI:
262465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTSTAR:
262565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTSTARI:
262665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
262765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINSTAR:
262865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINSTARI:
262965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINSTAR:
263065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINSTARI:
263165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
263265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSSTAR:
263365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSSTARI:
263465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSSTAR:
263565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSSTARI:
263665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
263765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_QUERY:
263865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_QUERYI:
263965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTQUERY:
264065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTQUERYI:
264165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
264265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINQUERY:
264365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINQUERYI:
264465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINQUERY:
264565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINQUERYI:
264665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
264765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSQUERY:
264865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSQUERYI:
264965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSQUERY:
265065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSQUERYI:
265165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
265265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
265365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
265465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
265565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_UPTO:
265665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_UPTOI:
265765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTUPTO:
265865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTUPTOI:
265965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
266065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINUPTO:
266165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINUPTOI:
266265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINUPTO:
266365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINUPTOI:
266465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
266565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSUPTO:
266665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSUPTOI:
266765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSUPTO:
266865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSUPTOI:
266965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
267065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
267165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
267265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
267365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
267465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument
267565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    string. */
267665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
267765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MARK:
267865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PRUNE_ARG:
267965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SKIP_ARG:
268065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_THEN_ARG:
268165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += code[1];
268265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
268365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
268465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* None of the remaining opcodes are required to match a character. */
268565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
268665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    default:
268765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
268865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
268965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
269065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
269165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn TRUE;
269265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
269365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
269465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
269565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
269665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
269765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*    Scan compiled regex for non-emptiness       *
269865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
269965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
270065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called to check for left recursive calls. We want to check
270165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe current branch of the current pattern to see if it could match the empty
270265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstring. If it could, we must look outwards for branches at other levels,
270365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstopping when we pass beyond the bracket which is the subject of the recursion.
270465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThis function is called only during the real compile, not during the
270565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpre-compile.
270665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
270765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
270865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code        points to start of the recursion
270965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  endcode     points to where to stop (current RECURSE item)
271065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  bcptr       points to the chain of current (unclosed) branch starts
271165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
271265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd          pointers to tables etc
271365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
271465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:      TRUE if what is matched could be empty
271565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
271665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
271765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
271865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcould_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
271965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  branch_chain *bcptr, BOOL utf, compile_data *cd)
272065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
272165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (bcptr != NULL && bcptr->current_branch >= code)
272265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
272365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
272465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return FALSE;
272565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  bcptr = bcptr->outer;
272665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
272765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn TRUE;
272865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
272965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
273065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
273165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
273265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
273365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*        Base opcode of repeated opcodes         *
273465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
273565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
273665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Returns the base opcode for repeated single character type opcodes. If the
273765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichopcode is not a repeated character type, it returns with the original value.
273865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
273965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:  c opcode
274065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:    base opcode for the type
274165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
274265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
274365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic pcre_uchar
274465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichget_repeat_base(pcre_uchar c)
274565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
274665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn (c > OP_TYPEPOSUPTO)? c :
274765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       (c >= OP_TYPESTAR)?   OP_TYPESTAR :
274865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       (c >= OP_NOTSTARI)?   OP_NOTSTARI :
274965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       (c >= OP_NOTSTAR)?    OP_NOTSTAR :
275065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       (c >= OP_STARI)?      OP_STARI :
275165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                             OP_STAR;
275265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
275365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
275465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
275565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
275665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
275765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
275865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*        Check a character and a property        *
275965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
276065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
276165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called by check_auto_possessive() when a property item
276265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis adjacent to a fixed character.
276365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
276465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
276565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  c            the character
276665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptype        the property type
276765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pdata        the data for the type
276865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  negated      TRUE if it's a negated property (\P or \p{^)
276965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
277065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:       TRUE if auto-possessifying is OK
277165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
277265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
277365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
277465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcheck_char_prop(pcre_uint32 c, unsigned int ptype, unsigned int pdata,
277565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  BOOL negated)
277665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
277765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint32 *p;
277865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst ucd_record *prop = GET_UCD(c);
277965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
278065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichswitch(ptype)
278165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
278265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PT_LAMP:
278365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return (prop->chartype == ucp_Lu ||
278465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          prop->chartype == ucp_Ll ||
278565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          prop->chartype == ucp_Lt) == negated;
278665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
278765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PT_GC:
278865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
278965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
279065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PT_PC:
279165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return (pdata == prop->chartype) == negated;
279265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
279365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PT_SC:
279465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return (pdata == prop->script) == negated;
279565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
279665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* These are specials */
279765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
279865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PT_ALNUM:
279965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
280065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
280165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
280265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
280365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  means that Perl space and POSIX space are now identical. PCRE was changed
280465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  at release 8.34. */
280565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
280665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PT_SPACE:    /* Perl space */
280765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PT_PXSPACE:  /* POSIX space */
280865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  switch(c)
280965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
281065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    HSPACE_CASES:
281165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    VSPACE_CASES:
281265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return negated;
281365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
281465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    default:
281565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
281665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
281765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  break;  /* Control never reaches here */
281865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
281965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PT_WORD:
282065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
282165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
282265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          c == CHAR_UNDERSCORE) == negated;
282365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
282465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PT_CLIST:
282565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  p = PRIV(ucd_caseless_sets) + prop->caseset;
282665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  for (;;)
282765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
282865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c < *p) return !negated;
282965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c == *p++) return negated;
283065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
283165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  break;  /* Control never reaches here */
283265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
283365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
283465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn FALSE;
283565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
283665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif  /* SUPPORT_UCP */
283765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
283865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
283965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
284065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
284165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*        Fill the character property list        *
284265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
284365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
284465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Checks whether the code points to an opcode that can take part in auto-
284565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpossessification, and if so, fills a list with its properties.
284665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
284765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
284865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code        points to start of expression
284965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
285065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  fcc         points to case-flipping table
285165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  list        points to output list
285265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              list[0] will be filled with the opcode
285365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              list[1] will be non-zero if this opcode
285465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                can match an empty character string
285565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              list[2..7] depends on the opcode
285665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
285765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:      points to the start of the next opcode if *code is accepted
285865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              NULL if *code is not accepted
285965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
286065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
286165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar *
286265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichget_chr_property_list(const pcre_uchar *code, BOOL utf,
286365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const pcre_uint8 *fcc, pcre_uint32 *list)
286465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
286565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar c = *code;
286665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar base;
286765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *end;
286865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 chr;
286965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
287065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
287165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 *clist_dest;
287265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint32 *clist_src;
287365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
287465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichutf = utf;  /* Suppress "unused parameter" compiler warning */
287565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
287665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
287765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlist[0] = c;
287865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlist[1] = FALSE;
287965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcode++;
288065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
288165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
288265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
288365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  base = get_repeat_base(c);
288465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  c -= (base - OP_STAR);
288565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
288665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
288765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += IMM2_SIZE;
288865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
288965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT && c != OP_POSPLUS);
289065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
289165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  switch(base)
289265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
289365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_STAR:
289465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[0] = OP_CHAR;
289565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
289665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
289765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_STARI:
289865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[0] = OP_CHARI;
289965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
290065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
290165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTSTAR:
290265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[0] = OP_NOT;
290365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
290465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
290565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTSTARI:
290665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[0] = OP_NOTI;
290765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
290865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
290965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPESTAR:
291065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[0] = *code;
291165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code++;
291265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
291365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
291465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  c = list[0];
291565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
291665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
291765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichswitch(c)
291865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
291965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_NOT_DIGIT:
292065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_DIGIT:
292165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_NOT_WHITESPACE:
292265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_WHITESPACE:
292365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_NOT_WORDCHAR:
292465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_WORDCHAR:
292565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_ANY:
292665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_ALLANY:
292765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_ANYNL:
292865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_NOT_HSPACE:
292965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_HSPACE:
293065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_NOT_VSPACE:
293165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_VSPACE:
293265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_EXTUNI:
293365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_EODN:
293465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_EOD:
293565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_DOLL:
293665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_DOLLM:
293765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return code;
293865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
293965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_CHAR:
294065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_NOT:
294165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  GETCHARINCTEST(chr, code);
294265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  list[2] = chr;
294365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  list[3] = NOTACHAR;
294465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return code;
294565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
294665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_CHARI:
294765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_NOTI:
294865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
294965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  GETCHARINCTEST(chr, code);
295065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  list[2] = chr;
295165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
295265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
295365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (chr < 128 || (chr < 256 && !utf))
295465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[3] = fcc[chr];
295565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
295665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[3] = UCD_OTHERCASE(chr);
295765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined SUPPORT_UTF || !defined COMPILE_PCRE8
295865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  list[3] = (chr < 256) ? fcc[chr] : chr;
295965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
296065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  list[3] = fcc[chr];
296165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
296265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
296365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* The othercase might be the same value. */
296465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
296565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (chr == list[3])
296665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[3] = NOTACHAR;
296765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
296865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[4] = NOTACHAR;
296965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return code;
297065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
297165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
297265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_PROP:
297365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_NOTPROP:
297465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (code[0] != PT_CLIST)
297565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
297665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[2] = code[0];
297765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[3] = code[1];
297865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return code + 2;
297965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
298065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
298165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Convert only if we have enough space. */
298265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
298365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  clist_src = PRIV(ucd_caseless_sets) + code[1];
298465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  clist_dest = list + 2;
298565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code += 2;
298665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
298765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  do {
298865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (clist_dest >= list + 8)
298965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       {
299065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       /* Early return if there is not enough space. This should never
299165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       happen, since all clists are shorter than 5 character now. */
299265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       list[2] = code[0];
299365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       list[3] = code[1];
299465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       return code;
299565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       }
299665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     *clist_dest++ = *clist_src;
299765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
299865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  while(*clist_src++ != NOTACHAR);
299965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
300065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* All characters are stored. The terminating NOTACHAR
300165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  is copied form the clist itself. */
300265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
300365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
300465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return code;
300565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
300665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
300765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_NCLASS:
300865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_CLASS:
300965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
301065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case OP_XCLASS:
301165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_XCLASS)
301265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    end = code + GET(code, 0) - 1;
301365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
301465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
301565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    end = code + 32 / sizeof(pcre_uchar);
301665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
301765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  switch(*end)
301865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
301965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRSTAR:
302065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRMINSTAR:
302165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRQUERY:
302265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRMINQUERY:
302365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRPOSSTAR:
302465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRPOSQUERY:
302565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[1] = TRUE;
302665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    end++;
302765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
302865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
302965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRPLUS:
303065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRMINPLUS:
303165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRPOSPLUS:
303265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    end++;
303365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
303465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
303565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRRANGE:
303665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRMINRANGE:
303765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CRPOSRANGE:
303865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[1] = (GET2(end, 1) == 0);
303965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    end += 1 + 2 * IMM2_SIZE;
304065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
304165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
304265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  list[2] = (pcre_uint32)(end - code);
304365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return end;
304465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
304565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn NULL;    /* Opcode not accepted */
304665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
304765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
304865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
304965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
305065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
305165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*    Scan further character sets for match       *
305265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
305365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
305465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Checks whether the base and the current opcode have a common character, in
305565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhich case the base cannot be possessified.
305665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
305765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
305865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code        points to the byte code
305965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
306065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd          static compile data
306165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  base_list   the data list of the base opcode
306265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
306365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:      TRUE if the auto-possessification is possible
306465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
306565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
306665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
306765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,
306865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const pcre_uint32 *base_list, const pcre_uchar *base_end)
306965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
307065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar c;
307165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 list[8];
307265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint32 *chr_ptr;
307365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint32 *ochr_ptr;
307465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint32 *list_ptr;
307565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *next_code;
307665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
307765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *xclass_flags;
307865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
307965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint8 *class_bitset;
308065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint8 *set1, *set2, *set_end;
308165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 chr;
308265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL accepted, invert_bits;
308365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL entered_a_group = FALSE;
308465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
308565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Note: the base_list[1] contains whether the current opcode has greedy
308665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich(represented by a non-zero value) quantifier. This is a different from
308765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichother character type lists, which stores here that the character iterator
308865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmatches to an empty string (also represented by a non-zero value). */
308965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
309065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor(;;)
309165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
309265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* All operations move the code pointer forward.
309365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  Therefore infinite recursions are not possible. */
309465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
309565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  c = *code;
309665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
309765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Skip over callouts */
309865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
309965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_CALLOUT)
310065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
310165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += PRIV(OP_lengths)[c];
310265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
310365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
310465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
310565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == OP_ALT)
310665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
310765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    do code += GET(code, 1); while (*code == OP_ALT);
310865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = *code;
310965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
311065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
311165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  switch(c)
311265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
311365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_END:
311465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_KETRPOS:
311565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* TRUE only in greedy case. The non-greedy case could be replaced by
311665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    an OP_EXACT, but it is probably not worth it. (And note that OP_EXACT
311765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    uses more memory, which we cannot get at this stage.) */
311865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
311965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return base_list[1] != 0;
312065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
312165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_KET:
312265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If the bracket is capturing, and referenced by an OP_RECURSE, or
312365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
312465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cannot be converted to a possessive form. */
312565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
312665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (base_list[1] == 0) return FALSE;
312765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
312865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    switch(*(code - GET(code, 1)))
312965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
313065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_ASSERT:
313165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_ASSERT_NOT:
313265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_ASSERTBACK:
313365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_ASSERTBACK_NOT:
313465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_ONCE:
313565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_ONCE_NC:
313665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Atomic sub-patterns and assertions can always auto-possessify their
313765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      last iterator. However, if the group was entered as a result of checking
313865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      a previous iterator, this is not possible. */
313965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
314065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      return !entered_a_group;
314165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
314265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
314365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += PRIV(OP_lengths)[c];
314465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
314565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
314665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ONCE:
314765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_ONCE_NC:
314865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_BRA:
314965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CBRA:
315065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    next_code = code + GET(code, 1);
315165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += PRIV(OP_lengths)[c];
315265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
315365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    while (*next_code == OP_ALT)
315465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
315565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE;
315665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      code = next_code + 1 + LINK_SIZE;
315765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      next_code += GET(next_code, 1);
315865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
315965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
316065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    entered_a_group = TRUE;
316165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
316265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
316365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_BRAZERO:
316465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_BRAMINZERO:
316565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
316665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    next_code = code + 1;
316765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (*next_code != OP_BRA && *next_code != OP_CBRA
316865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE;
316965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
317065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
317165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
317265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* The bracket content will be checked by the
317365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    OP_BRA/OP_CBRA case above. */
317465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    next_code += 1 + LINK_SIZE;
317565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (!compare_opcodes(next_code, utf, cd, base_list, base_end))
317665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      return FALSE;
317765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
317865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += PRIV(OP_lengths)[c];
317965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
318065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
318165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    default:
318265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
318365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
318465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
318565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Check for a supported opcode, and load its properties. */
318665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
318765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code = get_chr_property_list(code, utf, cd->fcc, list);
318865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (code == NULL) return FALSE;    /* Unsupported */
318965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
319065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* If either opcode is a small character list, set pointers for comparing
319165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  characters from that list with another list, or with a property. */
319265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
319365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (base_list[0] == OP_CHAR)
319465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
319565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    chr_ptr = base_list + 2;
319665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list_ptr = list;
319765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
319865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (list[0] == OP_CHAR)
319965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
320065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    chr_ptr = list + 2;
320165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list_ptr = base_list;
320265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
320365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
320465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Character bitsets can also be compared to certain opcodes. */
320565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
320665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
320765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE8
320865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
320965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
321065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
321165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      )
321265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
321365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE8
321465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
321565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
321665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (base_list[0] == OP_CLASS)
321765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
321865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
321965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      set1 = (pcre_uint8 *)(base_end - base_list[2]);
322065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      list_ptr = list;
322165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
322265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
322365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
322465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      set1 = (pcre_uint8 *)(code - list[2]);
322565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      list_ptr = base_list;
322665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
322765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
322865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    invert_bits = FALSE;
322965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    switch(list_ptr[0])
323065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
323165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CLASS:
323265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NCLASS:
323365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      set2 = (pcre_uint8 *)
323465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ((list_ptr == list ? code : base_end) - list_ptr[2]);
323565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
323665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
323765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
323865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_XCLASS:
323965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
324065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
324165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if ((*xclass_flags & XCL_MAP) == 0)
324265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
324365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* No bits are set for characters < 256. */
324465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (list[1] == 0) return TRUE;
324565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Might be an empty repeat. */
324665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        continue;
324765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
324865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      set2 = (pcre_uint8 *)(xclass_flags + 1);
324965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
325065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
325165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
325265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOT_DIGIT:
325365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      invert_bits = TRUE;
325465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Fall through */
325565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_DIGIT:
325665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      set2 = (pcre_uint8 *)(cd->cbits + cbit_digit);
325765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
325865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
325965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOT_WHITESPACE:
326065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      invert_bits = TRUE;
326165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Fall through */
326265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_WHITESPACE:
326365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      set2 = (pcre_uint8 *)(cd->cbits + cbit_space);
326465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
326565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
326665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOT_WORDCHAR:
326765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      invert_bits = TRUE;
326865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Fall through */
326965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_WORDCHAR:
327065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      set2 = (pcre_uint8 *)(cd->cbits + cbit_word);
327165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
327265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
327365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      default:
327465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      return FALSE;
327565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
327665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
327765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Because the sets are unaligned, we need
327865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    to perform byte comparison here. */
327965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    set_end = set1 + 32;
328065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (invert_bits)
328165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
328265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      do
328365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
328465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if ((*set1++ & ~(*set2++)) != 0) return FALSE;
328565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
328665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (set1 < set_end);
328765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
328865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
328965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
329065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      do
329165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
329265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if ((*set1++ & *set2++) != 0) return FALSE;
329365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
329465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (set1 < set_end);
329565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
329665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
329765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (list[1] == 0) return TRUE;
329865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Might be an empty repeat. */
329965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
330065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
330165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
330265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Some property combinations also acceptable. Unicode property opcodes are
330365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  processed specially; the rest can be handled with a lookup table. */
330465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
330565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
330665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
330765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    pcre_uint32 leftop, rightop;
330865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
330965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    leftop = base_list[0];
331065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    rightop = list[0];
331165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
331265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
331365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    accepted = FALSE; /* Always set in non-unicode case. */
331465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (leftop == OP_PROP || leftop == OP_NOTPROP)
331565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
331665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (rightop == OP_EOD)
331765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        accepted = TRUE;
331865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (rightop == OP_PROP || rightop == OP_NOTPROP)
331965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
332065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        int n;
332165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        const pcre_uint8 *p;
332265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        BOOL same = leftop == rightop;
332365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        BOOL lisprop = leftop == OP_PROP;
332465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        BOOL risprop = rightop == OP_PROP;
332565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        BOOL bothprop = lisprop && risprop;
332665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
332765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* There's a table that specifies how each combination is to be
332865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        processed:
332965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          0   Always return FALSE (never auto-possessify)
333065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          1   Character groups are distinct (possessify if both are OP_PROP)
333165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          2   Check character categories in the same group (general or particular)
333265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          3   Return TRUE if the two opcodes are not the same
333365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ... see comments below
333465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        */
333565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
333665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        n = propposstab[base_list[2]][list[2]];
333765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        switch(n)
333865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
333965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 0: break;
334065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 1: accepted = bothprop; break;
334165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 2: accepted = (base_list[3] == list[3]) != same; break;
334265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 3: accepted = !same; break;
334365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
334465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 4:  /* Left general category, right particular category */
334565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          accepted = risprop && catposstab[base_list[3]][list[3]] == same;
334665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
334765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
334865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 5:  /* Right general category, left particular category */
334965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
335065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
335165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
335265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* This code is logically tricky. Think hard before fiddling with it.
335365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          The posspropstab table has four entries per row. Each row relates to
335465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          one of PCRE's special properties such as ALNUM or SPACE or WORD.
335565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          Only WORD actually needs all four entries, but using repeats for the
335665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          others means they can all use the same code below.
335765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
335865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          The first two entries in each row are Unicode general categories, and
335965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          apply always, because all the characters they include are part of the
336065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PCRE character set. The third and fourth entries are a general and a
336165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          particular category, respectively, that include one or more relevant
336265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          characters. One or the other is used, depending on whether the check
336365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          is for a general or a particular category. However, in both cases the
336465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          category contains more characters than the specials that are defined
336565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for the property being tested against. Therefore, it cannot be used
336665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          in a NOTPROP case.
336765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
336865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
336965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          Underscore is covered by ucp_P or ucp_Po. */
337065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
337165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 6:  /* Left alphanum vs right general category */
337265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 7:  /* Left space vs right general category */
337365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 8:  /* Left word vs right general category */
337465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          p = posspropstab[n-6];
337565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          accepted = risprop && lisprop ==
337665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (list[3] != p[0] &&
337765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             list[3] != p[1] &&
337865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (list[3] != p[2] || !lisprop));
337965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
338065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
338165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 9:   /* Right alphanum vs left general category */
338265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 10:  /* Right space vs left general category */
338365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 11:  /* Right word vs left general category */
338465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          p = posspropstab[n-9];
338565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          accepted = lisprop && risprop ==
338665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (base_list[3] != p[0] &&
338765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             base_list[3] != p[1] &&
338865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (base_list[3] != p[2] || !risprop));
338965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
339065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
339165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 12:  /* Left alphanum vs right particular category */
339265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 13:  /* Left space vs right particular category */
339365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 14:  /* Left word vs right particular category */
339465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          p = posspropstab[n-12];
339565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          accepted = risprop && lisprop ==
339665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (catposstab[p[0]][list[3]] &&
339765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             catposstab[p[1]][list[3]] &&
339865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (list[3] != p[3] || !lisprop));
339965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
340065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
340165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 15:  /* Right alphanum vs left particular category */
340265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 16:  /* Right space vs left particular category */
340365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case 17:  /* Right word vs left particular category */
340465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          p = posspropstab[n-15];
340565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          accepted = lisprop && risprop ==
340665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (catposstab[p[0]][base_list[3]] &&
340765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             catposstab[p[1]][base_list[3]] &&
340865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (base_list[3] != p[3] || !risprop));
340965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
341065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
341165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
341265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
341365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
341465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
341565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif  /* SUPPORT_UCP */
341665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
341765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
341865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich           rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
341965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich           autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
342065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
342165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (!accepted) return FALSE;
342265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
342365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (list[1] == 0) return TRUE;
342465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Might be an empty repeat. */
342565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
342665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
342765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
342865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Control reaches here only if one of the items is a small character list.
342965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  All characters are checked against the other side. */
343065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
343165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  do
343265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
343365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    chr = *chr_ptr;
343465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
343565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    switch(list_ptr[0])
343665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
343765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CHAR:
343865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ochr_ptr = list_ptr + 2;
343965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      do
344065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
344165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (chr == *ochr_ptr) return FALSE;
344265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ochr_ptr++;
344365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
344465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while(*ochr_ptr != NOTACHAR);
344565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
344665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
344765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOT:
344865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ochr_ptr = list_ptr + 2;
344965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      do
345065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
345165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (chr == *ochr_ptr)
345265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
345365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ochr_ptr++;
345465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
345565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while(*ochr_ptr != NOTACHAR);
345665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (*ochr_ptr == NOTACHAR) return FALSE;   /* Not found */
345765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
345865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
345965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not*
346065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
346165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
346265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_DIGIT:
346365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (chr < 256 && (cd->ctypes[chr] & ctype_digit) != 0) return FALSE;
346465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
346565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
346665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOT_DIGIT:
346765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (chr > 255 || (cd->ctypes[chr] & ctype_digit) == 0) return FALSE;
346865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
346965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
347065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_WHITESPACE:
347165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (chr < 256 && (cd->ctypes[chr] & ctype_space) != 0) return FALSE;
347265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
347365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
347465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOT_WHITESPACE:
347565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (chr > 255 || (cd->ctypes[chr] & ctype_space) == 0) return FALSE;
347665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
347765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
347865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_WORDCHAR:
347965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (chr < 255 && (cd->ctypes[chr] & ctype_word) != 0) return FALSE;
348065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
348165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
348265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOT_WORDCHAR:
348365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (chr > 255 || (cd->ctypes[chr] & ctype_word) == 0) return FALSE;
348465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
348565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
348665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_HSPACE:
348765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      switch(chr)
348865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
348965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        HSPACE_CASES: return FALSE;
349065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        default: break;
349165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
349265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
349365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
349465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOT_HSPACE:
349565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      switch(chr)
349665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
349765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        HSPACE_CASES: break;
349865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        default: return FALSE;
349965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
350065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
350165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
350265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_ANYNL:
350365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_VSPACE:
350465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      switch(chr)
350565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
350665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        VSPACE_CASES: return FALSE;
350765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        default: break;
350865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
350965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
351065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
351165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOT_VSPACE:
351265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      switch(chr)
351365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
351465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        VSPACE_CASES: break;
351565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        default: return FALSE;
351665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
351765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
351865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
351965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_DOLL:
352065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_EODN:
352165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      switch (chr)
352265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
352365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_CR:
352465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_LF:
352565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_VT:
352665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_FF:
352765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_NEL:
352865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC
352965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case 0x2028:
353065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case 0x2029:
353165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif  /* Not EBCDIC */
353265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        return FALSE;
353365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
353465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
353565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
353665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_EOD:    /* Can always possessify before \z */
353765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
353865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
353965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
354065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_PROP:
354165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NOTPROP:
354265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
354365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            list_ptr[0] == OP_NOTPROP))
354465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        return FALSE;
354565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
354665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
354765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
354865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_NCLASS:
354965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (chr > 255) return FALSE;
355065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Fall through */
355165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
355265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_CLASS:
355365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (chr > 255) break;
355465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      class_bitset = (pcre_uint8 *)
355565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ((list_ptr == list ? code : base_end) - list_ptr[2]);
355665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
355765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
355865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
355965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
356065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case OP_XCLASS:
356165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
356265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          list_ptr[2] + LINK_SIZE, utf)) return FALSE;
356365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
356465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
356565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
356665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      default:
356765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      return FALSE;
356865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
356965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
357065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    chr_ptr++;
357165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
357265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  while(*chr_ptr != NOTACHAR);
357365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
357465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* At least one character must be matched from this opcode. */
357565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
357665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (list[1] == 0) return TRUE;
357765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
357865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
357965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Control never reaches here. There used to be a fail-save return FALSE; here,
358065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbut some compilers complain about an unreachable statement. */
358165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
358265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
358365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
358465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
358565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
358665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
358765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*    Scan compiled regex for auto-possession     *
358865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
358965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
359065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Replaces single character iterations with their possessive alternatives
359165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif appropriate. This function modifies the compiled opcode!
359265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
359365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
359465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code        points to start of the byte code
359565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
359665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd          static compile data
359765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
359865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:      nothing
359965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
360065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
360165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic void
360265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichauto_possessify(pcre_uchar *code, BOOL utf, const compile_data *cd)
360365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
360465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uchar c;
360565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *end;
360665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *repeat_opcode;
360765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 list[8];
360865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
360965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;)
361065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
361165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  c = *code;
361265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
361365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
361465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
361565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c -= get_repeat_base(c) - OP_STAR;
361665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    end = (c <= OP_MINUPTO) ?
361765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      get_chr_property_list(code, utf, cd->fcc, list) : NULL;
361865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
361965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
362065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (end != NULL && compare_opcodes(end, utf, cd, list, end))
362165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
362265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      switch(c)
362365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
362465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_STAR:
362565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code += OP_POSSTAR - OP_STAR;
362665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
362765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
362865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_MINSTAR:
362965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code += OP_POSSTAR - OP_MINSTAR;
363065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
363165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
363265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_PLUS:
363365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code += OP_POSPLUS - OP_PLUS;
363465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
363565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
363665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_MINPLUS:
363765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code += OP_POSPLUS - OP_MINPLUS;
363865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
363965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
364065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_QUERY:
364165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code += OP_POSQUERY - OP_QUERY;
364265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
364365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
364465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_MINQUERY:
364565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code += OP_POSQUERY - OP_MINQUERY;
364665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
364765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
364865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_UPTO:
364965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code += OP_POSUPTO - OP_UPTO;
365065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
365165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
365265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_MINUPTO:
365365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code += OP_POSUPTO - OP_MINUPTO;
365465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
365565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
365665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
365765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = *code;
365865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
365965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
366065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
366165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
366265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c == OP_XCLASS)
366365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      repeat_opcode = code + GET(code, 1);
366465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
366565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
366665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      repeat_opcode = code + 1 + (32 / sizeof(pcre_uchar));
366765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
366865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = *repeat_opcode;
366965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
367065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
367165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* end must not be NULL. */
367265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      end = get_chr_property_list(code, utf, cd->fcc, list);
367365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
367465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      list[1] = (c & 1) == 0;
367565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
367665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (compare_opcodes(end, utf, cd, list, end))
367765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
367865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        switch (c)
367965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
368065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case OP_CRSTAR:
368165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case OP_CRMINSTAR:
368265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *repeat_opcode = OP_CRPOSSTAR;
368365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
368465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
368565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case OP_CRPLUS:
368665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case OP_CRMINPLUS:
368765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *repeat_opcode = OP_CRPOSPLUS;
368865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
368965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
369065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case OP_CRQUERY:
369165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case OP_CRMINQUERY:
369265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *repeat_opcode = OP_CRPOSQUERY;
369365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
369465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
369565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case OP_CRRANGE:
369665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case OP_CRMINRANGE:
369765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *repeat_opcode = OP_CRPOSRANGE;
369865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
369965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
370065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
370165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
370265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = *code;
370365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
370465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
370565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  switch(c)
370665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
370765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_END:
370865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return;
370965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
371065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPESTAR:
371165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINSTAR:
371265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPLUS:
371365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINPLUS:
371465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEQUERY:
371565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINQUERY:
371665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSSTAR:
371765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSPLUS:
371865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSQUERY:
371965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
372065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
372165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
372265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEUPTO:
372365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEMINUPTO:
372465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEEXACT:
372565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_TYPEPOSUPTO:
372665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
372765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      code += 2;
372865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
372965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
373065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
373165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_XCLASS:
373265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += GET(code, 1);
373365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
373465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
373565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
373665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MARK:
373765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PRUNE_ARG:
373865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_SKIP_ARG:
373965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_THEN_ARG:
374065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += code[1];
374165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
374265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
374365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
374465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Add in the fixed length from the table */
374565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
374665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code += PRIV(OP_lengths)[c];
374765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
374865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* In UTF-8 mode, opcodes that are followed by a character may be followed by
374965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  a multi-byte character. The length in the table is a minimum, so we have to
375065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  arrange to skip the extra bytes. */
375165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
375265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
375365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (utf) switch(c)
375465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
375565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CHAR:
375665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_CHARI:
375765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOT:
375865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTI:
375965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_STAR:
376065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINSTAR:
376165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PLUS:
376265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINPLUS:
376365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_QUERY:
376465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINQUERY:
376565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_UPTO:
376665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINUPTO:
376765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_EXACT:
376865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSSTAR:
376965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSPLUS:
377065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSQUERY:
377165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSUPTO:
377265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_STARI:
377365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINSTARI:
377465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_PLUSI:
377565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINPLUSI:
377665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_QUERYI:
377765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINQUERYI:
377865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_UPTOI:
377965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_MINUPTOI:
378065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_EXACTI:
378165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSSTARI:
378265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSPLUSI:
378365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSQUERYI:
378465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_POSUPTOI:
378565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTSTAR:
378665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINSTAR:
378765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPLUS:
378865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINPLUS:
378965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTQUERY:
379065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINQUERY:
379165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTUPTO:
379265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINUPTO:
379365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTEXACT:
379465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSSTAR:
379565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSPLUS:
379665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSQUERY:
379765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSUPTO:
379865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTSTARI:
379965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINSTARI:
380065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPLUSI:
380165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINPLUSI:
380265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTQUERYI:
380365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINQUERYI:
380465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTUPTOI:
380565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTMINUPTOI:
380665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTEXACTI:
380765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSSTARI:
380865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSPLUSI:
380965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSQUERYI:
381065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case OP_NOTPOSUPTOI:
381165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
381265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
381365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
381465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
381565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  (void)(utf);  /* Keep compiler happy by referencing function argument */
381665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
381765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
381865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
381965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
382065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
382165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
382265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
382365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*           Check for POSIX class syntax         *
382465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
382565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
382665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called when the sequence "[:" or "[." or "[=" is
382765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichencountered in a character class. It checks whether this is followed by a
382865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsequence of characters terminated by a matching ":]" or ".]" or "=]". If we
382965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreach an unescaped ']' without the special preceding character, return FALSE.
383065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
383165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichOriginally, this function only recognized a sequence of letters between the
383265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichterminators, but it seems that Perl recognizes any sequence of characters,
383365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthough of course unknown POSIX names are subsequently rejected. Perl gives an
383465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
383565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdidn't consider this to be a POSIX class. Likewise for [:1234:].
383665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
383765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThe problem in trying to be exactly like Perl is in the handling of escapes. We
383865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhave to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
383965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichclass, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
384065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbelow handles the special case of \], but does not try to do any other escape
384165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichprocessing. This makes it different from Perl for cases such as [:l\ower:]
384265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhere Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
384365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"l\ower". This is a lesser evil than not diagnosing bad classes when Perl does,
384465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichI think.
384565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
384665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichA user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
384765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIt seems that the appearance of a nested POSIX class supersedes an apparent
384865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichexternal class. For example, [:a[:digit:]b:] matches "a", "b", ":", or
384965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevicha digit.
385065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
385165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIn Perl, unescaped square brackets may also appear as part of class names. For
385265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichexample, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for
385365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich[:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not
385465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichseem right at all. PCRE does not allow closing square brackets in POSIX class
385565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnames.
385665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
385765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
385865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptr      pointer to the initial [
385965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  endptr   where to return the end pointer
386065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
386165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:   TRUE or FALSE
386265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
386365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
386465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
386565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcheck_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
386665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
386765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar terminator;          /* Don't combine these lines; the Solaris cc */
386865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichterminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
386965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (++ptr; *ptr != CHAR_NULL; ptr++)
387065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
387165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
387265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ptr++;
387365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
387465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
387565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
387665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
387765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
387865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *endptr = ptr;
387965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      return TRUE;
388065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
388165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (*ptr == CHAR_LEFT_SQUARE_BRACKET &&
388265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
388365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr[1] == CHAR_EQUALS_SIGN) &&
388465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        check_posix_syntax(ptr, endptr))
388565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      return FALSE;
388665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
388765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
388865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn FALSE;
388965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
389065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
389165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
389265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
389365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
389465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
389565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*          Check POSIX class name                *
389665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
389765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
389865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called to check the name given in a POSIX-style class entry
389965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsuch as [:alnum:].
390065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
390165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
390265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptr        points to the first letter
390365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  len        the length of the name
390465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
390565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:     a value representing the name, or -1 if unknown
390665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
390765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
390865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int
390965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcheck_posix_name(const pcre_uchar *ptr, int len)
391065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
391165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst char *pn = posix_names;
391265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister int yield = 0;
391365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (posix_name_lengths[yield] != 0)
391465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
391565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (len == posix_name_lengths[yield] &&
391665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    STRNCMP_UC_C8(ptr, pn, (unsigned int)len) == 0) return yield;
391765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pn += posix_name_lengths[yield] + 1;
391865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  yield++;
391965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
392065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn -1;
392165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
392265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
392365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
392465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
392565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*    Adjust OP_RECURSE items in repeated group   *
392665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
392765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
392865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* OP_RECURSE items contain an offset from the start of the regex to the group
392965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthat is referenced. This means that groups can be replicated for fixed
393065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrepetition simply by copying (because the recursion is allowed to refer to
393165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichearlier groups that are outside the current group). However, when a group is
393265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichoptional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is
393365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinserted before it, after it has been compiled. This means that any OP_RECURSE
393465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichitems within it that refer to the group itself or any contained groups have to
393565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhave their offsets adjusted. That one of the jobs of this function. Before it
393665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis called, the partially compiled regex must be temporarily terminated with
393765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichOP_END.
393865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
393965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThis function has been extended with the possibility of forward references for
394065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrecursions and subroutine calls. It must also check the list of such references
394165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor the group we are dealing with. If it finds that one of the recursions in
394265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe current group is on this list, it adjusts the offset in the list, not the
394365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue in the reference (which is a group number).
394465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
394565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
394665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  group      points to the start of the group
394765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  adjust     the amount by which the group is to be moved
394865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  utf        TRUE in UTF-8 / UTF-16 / UTF-32 mode
394965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd         contains pointers to tables etc.
395065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  save_hwm   the hwm forward reference pointer at the start of the group
395165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
395265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:     nothing
395365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
395465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
395565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic void
395665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichadjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
395765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uchar *save_hwm)
395865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
395965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *ptr = group;
396065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
396165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
396265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
396365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int offset;
396465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uchar *hc;
396565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
396665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* See if this recursion is on the forward reference list. If so, adjust the
396765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  reference. */
396865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
396965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)
397065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
397165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    offset = (int)GET(hc, 0);
397265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (cd->start_code + offset == ptr + 1)
397365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
397465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      PUT(hc, 0, offset + adjust);
397565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
397665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
397765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
397865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
397965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Otherwise, adjust the recursion offset if it's after the start of this
398065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  group. */
398165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
398265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (hc >= cd->hwm)
398365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
398465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    offset = (int)GET(ptr, 1);
398565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);
398665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
398765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
398865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptr += 1 + LINK_SIZE;
398965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
399065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
399165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
399265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
399365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
399465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
399565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*        Insert an automatic callout point       *
399665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
399765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
399865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called when the PCRE_AUTO_CALLOUT option is set, to insert
399965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcallout points before each pattern item.
400065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
400165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
400265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code           current code pointer
400365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptr            current pattern pointer
400465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd             pointers to tables etc
400565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
400665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:         new code pointer
400765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
400865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
400965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic pcre_uchar *
401065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichauto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd)
401165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
401265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*code++ = OP_CALLOUT;
401365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*code++ = 255;
401465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
401565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPUT(code, LINK_SIZE, 0);                       /* Default length */
401665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn code + 2 * LINK_SIZE;
401765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
401865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
401965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
402065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
402165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
402265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*         Complete a callout item                *
402365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
402465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
402565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* A callout item contains the length of the next item in the pattern, which
402665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwe can't fill in till after we have reached the relevant point. This is used
402765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor both automatic and manual callouts.
402865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
402965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
403065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  previous_callout   points to previous callout item
403165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptr                current pattern pointer
403265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd                 pointers to tables etc
403365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
403465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:             nothing
403565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
403665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
403765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic void
403865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcomplete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd)
403965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
404065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
404165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPUT(previous_callout, 2 + LINK_SIZE, length);
404265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
404365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
404465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
404565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
404665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
404765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
404865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*           Get othercase range                  *
404965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
405065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
405165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is passed the start and end of a class range, in UTF-8 mode
405265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwith UCP support. It searches up the characters, looking for ranges of
405365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacters in the "other" case. Each call returns the next one, updating the
405465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstart address. A character with multiple other cases is returned on its own
405565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwith a special return value.
405665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
405765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
405865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cptr        points to starting character value; updated
405965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  d           end value
406065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ocptr       where to put start of othercase range
406165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  odptr       where to put end of othercase range
406265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
406365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichYield:        -1 when no more
406465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich               0 when a range is returned
406565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              >0 the CASESET offset for char with multiple other cases
406665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                in this case, ocptr contains the original
406765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
406865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
406965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int
407065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichget_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr,
407165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uint32 *odptr)
407265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
407365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 c, othercase, next;
407465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichunsigned int co;
407565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
407665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Find the first character that has an other case. If it has multiple other
407765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcases, return its case offset value. */
407865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
407965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (c = *cptr; c <= d; c++)
408065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
408165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if ((co = UCD_CASESET(c)) != 0)
408265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
408365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *ocptr = c++;   /* Character that has the set */
408465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *cptr = c;      /* Rest of input range */
408565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return (int)co;
408665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
408765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if ((othercase = UCD_OTHERCASE(c)) != c) break;
408865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
408965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
409065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (c > d) return -1;  /* Reached end of range */
409165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
409265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Found a character that has a single other case. Search for the end of the
409365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrange, which is either the end of the input range, or a character that has zero
409465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichor more than one other cases. */
409565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
409665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ocptr = othercase;
409765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnext = othercase + 1;
409865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
409965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (++c; c <= d; c++)
410065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
410165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break;
410265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  next++;
410365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
410465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
410565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*odptr = next - 1;     /* End of othercase range */
410665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*cptr = c;             /* Rest of input range */
410765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn 0;
410865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
410965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif  /* SUPPORT_UCP */
411065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
411165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
411265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
411365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
411465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*        Add a character or range to a class     *
411565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
411665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
411765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function packages up the logic of adding a character or range of
411865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacters to a class. The character values in the arguments will be within the
411965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
412065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmutually recursive with the function immediately below.
412165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
412265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
412365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  classbits     the bit map for characters < 256
412465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  uchardptr     points to the pointer for extra data
412565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  options       the options word
412665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd            contains pointers to tables etc.
412765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  start         start of range character
412865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  end           end of range character
412965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
413065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:        the number of < 256 characters added
413165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                the pointer to extra data is updated
413265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
413365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
413465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int
413565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichadd_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options,
413665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  compile_data *cd, pcre_uint32 start, pcre_uint32 end)
413765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
413865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 c;
413965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 classbits_end = (end <= 0xff ? end : 0xff);
414065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint n8 = 0;
414165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
414265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If caseless matching is required, scan the range and process alternate
414365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcases. In Unicode, there are 8-bit characters that have alternate cases that
414465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichare greater than 255 and vice-versa. Sometimes we can just extend the original
414565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrange. */
414665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
414765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & PCRE_CASELESS) != 0)
414865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
414965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
415065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if ((options & PCRE_UTF8) != 0)
415165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
415265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    int rc;
415365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    pcre_uint32 oc, od;
415465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
415565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    options &= ~PCRE_CASELESS;   /* Remove for recursive calls */
415665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = start;
415765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
415865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
415965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
416065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Handle a single character that has more than one other case. */
416165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
416265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cd,
416365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        PRIV(ucd_caseless_sets) + rc, oc);
416465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
416565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Do nothing if the other case range is within the original range. */
416665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
416765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (oc >= start && od <= end) continue;
416865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
416965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Extend the original range if there is overlap, noting that if oc < c, we
417065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      can't have od > end because a subrange is always shorter than the basic
417165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      range. Otherwise, use a recursive call to add the additional range. */
417265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
417365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
417465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (od > end && oc <= end + 1) end = od;       /* Extend upwards */
417565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od);
417665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
417765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
417865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
417965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif  /* SUPPORT_UCP */
418065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
418165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Not UTF-mode, or no UCP */
418265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
418365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  for (c = start; c <= classbits_end; c++)
418465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
418565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    SETBIT(classbits, cd->fcc[c]);
418665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    n8++;
418765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
418865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
418965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
419065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Now handle the original range. Adjust the final value according to the bit
419165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength - this means that the same lists of (e.g.) horizontal spaces can be used
419265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichin all cases. */
419365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
419465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
419565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
419665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if ((options & PCRE_UTF8) == 0)
419765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
419865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (end > 0xff) end = 0xff;
419965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
420065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
420165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
420265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if ((options & PCRE_UTF16) == 0)
420365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
420465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (end > 0xffff) end = 0xffff;
420565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
420665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* COMPILE_PCRE[8|16] */
420765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
420865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Use the bitmap for characters < 256. Otherwise use extra data.*/
420965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
421065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (c = start; c <= classbits_end; c++)
421165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
421265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Regardless of start, c will always be <= 255. */
421365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  SETBIT(classbits, c);
421465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  n8++;
421565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
421665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
421765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
421865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (start <= 0xff) start = 0xff + 1;
421965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
422065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (end >= start)
422165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
422265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uchar *uchardata = *uchardptr;
422365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
422465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if ((options & PCRE_UTF8) != 0)  /* All UTFs use the same flag bit */
422565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
422665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (start < end)
422765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
422865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *uchardata++ = XCL_RANGE;
422965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      uchardata += PRIV(ord2utf)(start, uchardata);
423065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      uchardata += PRIV(ord2utf)(end, uchardata);
423165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
423265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else if (start == end)
423365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
423465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *uchardata++ = XCL_SINGLE;
423565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      uchardata += PRIV(ord2utf)(start, uchardata);
423665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
423765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
423865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
423965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif  /* SUPPORT_UTF */
424065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
424165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Without UTF support, character values are constrained by the bit length,
424265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  and can only be > 256 for 16-bit and 32-bit libraries. */
424365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
424465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE8
424565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {}
424665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
424765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (start < end)
424865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
424965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *uchardata++ = XCL_RANGE;
425065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *uchardata++ = start;
425165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *uchardata++ = end;
425265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
425365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (start == end)
425465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
425565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *uchardata++ = XCL_SINGLE;
425665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *uchardata++ = start;
425765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
425865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
425965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
426065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  *uchardptr = uchardata;   /* Updata extra data pointer */
426165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
426265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
426365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
426465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn n8;    /* Number of 8-bit characters */
426565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
426665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
426765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
426865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
426965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
427065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
427165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*        Add a list of characters to a class     *
427265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
427365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
427465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is used for adding a list of case-equivalent characters to a
427565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichclass, and also for adding a list of horizontal or vertical whitespace. If the
427665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlist is in order (which it should be), ranges of characters are detected and
427765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhandled appropriately. This function is mutually recursive with the function
427865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichabove.
427965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
428065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
428165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  classbits     the bit map for characters < 256
428265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  uchardptr     points to the pointer for extra data
428365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  options       the options word
428465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd            contains pointers to tables etc.
428565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  p             points to row of 32-bit values, terminated by NOTACHAR
428665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  except        character to omit; this is used when adding lists of
428765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  case-equivalent characters to avoid including the one we
428865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  already know about
428965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
429065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:        the number of < 256 characters added
429165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                the pointer to extra data is updated
429265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
429365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
429465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int
429565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichadd_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options,
429665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  compile_data *cd, const pcre_uint32 *p, unsigned int except)
429765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
429865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint n8 = 0;
429965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (p[0] < NOTACHAR)
430065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
430165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int n = 0;
430265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (p[0] != except)
430365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
430465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    while(p[n+1] == p[0] + n + 1) n++;
430565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    n8 += add_to_class(classbits, uchardptr, options, cd, p[0], p[n]);
430665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
430765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  p += n + 1;
430865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
430965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn n8;
431065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
431165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
431265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
431365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
431465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
431565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*    Add characters not in a list to a class     *
431665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
431765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
431865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is used for adding the complement of a list of horizontal or
431965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvertical whitespace to a class. The list must be in order.
432065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
432165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
432265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  classbits     the bit map for characters < 256
432365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  uchardptr     points to the pointer for extra data
432465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  options       the options word
432565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd            contains pointers to tables etc.
432665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  p             points to row of 32-bit values, terminated by NOTACHAR
432765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
432865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:        the number of < 256 characters added
432965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                the pointer to extra data is updated
433065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
433165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
433265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int
433365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichadd_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr,
433465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int options, compile_data *cd, const pcre_uint32 *p)
433565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
433665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL utf = (options & PCRE_UTF8) != 0;
433765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint n8 = 0;
433865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (p[0] > 0)
433965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  n8 += add_to_class(classbits, uchardptr, options, cd, 0, p[0] - 1);
434065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (p[0] < NOTACHAR)
434165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
434265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  while (p[1] == p[0] + 1) p++;
434365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1,
434465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
434565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  p++;
434665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
434765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn n8;
434865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
434965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
435065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
435165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
435265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
435365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*           Compile one branch                   *
435465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
435565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
435665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Scan the pattern, compiling it into the a vector. If the options are
435765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichchanged during the branch, the pointer is used to change the external options
435865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbits. This function is used during the pre-compile phase when we are trying
435965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto find out the amount of memory needed, as well as during the real compile
436065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichphase. The value of lengthptr distinguishes the two phases.
436165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
436265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
436365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  optionsptr        pointer to the option bits
436465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  codeptr           points to the pointer to the current code point
436565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptrptr            points to the current pattern pointer
436665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcodeptr      points to error code variable
436765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  firstcharptr      place to put the first required character
436865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  firstcharflagsptr place to put the first character flags, or a negative number
436965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  reqcharptr        place to put the last required character
437065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  reqcharflagsptr   place to put the last required character flags, or a negative number
437165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  bcptr             points to current branch chain
437265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cond_depth        conditional nesting depth
437365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd                contains pointers to tables etc.
437465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  lengthptr         NULL during the real compile phase
437565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                    points to length accumulator during pre-compile phase
437665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
437765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:            TRUE on success
437865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                    FALSE, with *errorcodeptr set non-zero on error
437965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
438065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
438165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
438265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompile_branch(int *optionsptr, pcre_uchar **codeptr,
438365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const pcre_uchar **ptrptr, int *errorcodeptr,
438465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr,
438565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr,
438665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  branch_chain *bcptr, int cond_depth,
438765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  compile_data *cd, int *lengthptr)
438865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
438965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint repeat_type, op_type;
439065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
439165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint bravalue = 0;
439265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint greedy_default, greedy_non_default;
439365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 firstchar, reqchar;
439465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 firstcharflags, reqcharflags;
439565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 zeroreqchar, zerofirstchar;
439665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 zeroreqcharflags, zerofirstcharflags;
439765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 req_caseopt, reqvary, tempreqvary;
439865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint options = *optionsptr;               /* May change dynamically */
439965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint after_manual_callout = 0;
440065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint length_prevgroup = 0;
440165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uint32 c;
440265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint escape;
440365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uchar *code = *codeptr;
440465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *last_code = code;
440565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *orig_code = code;
440665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *tempcode;
440765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL inescq = FALSE;
440865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL groupsetfirstchar = FALSE;
440965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *ptr = *ptrptr;
441065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *tempptr;
441165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *nestptr = NULL;
441265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *previous = NULL;
441365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *previous_callout = NULL;
441465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *save_hwm = NULL;
441565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint8 classbits[32];
441665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
441765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* We can fish out the UTF-8 setting once and for all into a BOOL, but we
441865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmust not do this for other options (e.g. PCRE_EXTENDED) because they may change
441965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdynamically as we process the pattern. */
442065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
442165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
442265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
442365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL utf = (options & PCRE_UTF8) != 0;
442465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef COMPILE_PCRE32
442565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar utf_chars[6];
442665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
442765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
442865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL utf = FALSE;
442965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
443065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
443165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Helper variables for OP_XCLASS opcode (for characters > 255). We define
443265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichclass_uchardata always so that it can be passed to add_to_class() always,
443365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthough it will not be used in non-UTF 8-bit cases. This avoids having to supply
443465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichalternative calls for the different cases. */
443565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
443665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *class_uchardata;
443765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
443865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL xclass;
443965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *class_uchardata_base;
444065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
444165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
444265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef PCRE_DEBUG
444365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (lengthptr != NULL) DPRINTF((">> start branch\n"));
444465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
444565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
444665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Set up the default and non-default settings for greediness */
444765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
444865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgreedy_default = ((options & PCRE_UNGREEDY) != 0);
444965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgreedy_non_default = greedy_default ^ 1;
445065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
445165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Initialize no first byte, no required byte. REQ_UNSET means "no char
445265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmatching encountered yet". It gets changed to REQ_NONE if we hit something that
445365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmatches a non-fixed char first char; reqchar just remains unset if we never
445465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfind one.
445565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
445665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichWhen we hit a repeat whose minimum is zero, we may have to adjust these values
445765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto take the zero repeat into account. This is implemented by setting them to
445865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichzerofirstbyte and zeroreqchar when such a repeat is encountered. The individual
445965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichitem types that can be repeated set these backoff variables appropriately. */
446065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
446165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirstchar = reqchar = zerofirstchar = zeroreqchar = 0;
446265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirstcharflags = reqcharflags = zerofirstcharflags = zeroreqcharflags = REQ_UNSET;
446365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
446465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The variable req_caseopt contains either the REQ_CASELESS value
446565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichor zero, according to the current setting of the caseless flag. The
446665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichREQ_CASELESS leaves the lower 28 bit empty. It is added into the
446765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirstchar or reqchar variables to record the case status of the
446865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue. This is used only for ASCII characters. */
446965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
447065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreq_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
447165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
447265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Switch on next character until the end of the branch */
447365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
447465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;; ptr++)
447565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
447665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  BOOL negate_class;
447765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  BOOL should_flip_negation;
447865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  BOOL possessive_quantifier;
447965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  BOOL is_quantifier;
448065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  BOOL is_recurse;
448165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  BOOL reset_bracount;
448265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int class_has_8bitchar;
448365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int class_one_char;
448465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
448565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  BOOL xclass_has_prop;
448665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
448765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int newoptions;
448865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int recno;
448965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int refsign;
449065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int skipbytes;
449165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uint32 subreqchar, subfirstchar;
449265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_int32 subreqcharflags, subfirstcharflags;
449365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int terminator;
449465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  unsigned int mclength;
449565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  unsigned int tempbracount;
449665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uint32 ec;
449765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uchar mcbuffer[8];
449865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
449965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Get next character in the pattern */
450065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
450165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  c = *ptr;
450265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
450365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* If we are at the end of a nested substitution, revert to the outer level
450465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  string. Nesting only happens one level deep. */
450565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
450665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (c == CHAR_NULL && nestptr != NULL)
450765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
450865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ptr = nestptr;
450965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    nestptr = NULL;
451065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c = *ptr;
451165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
451265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
451365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* If we are in the pre-compile phase, accumulate the length used for the
451465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  previous cycle of this loop. */
451565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
451665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (lengthptr != NULL)
451765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
451865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef PCRE_DEBUG
451965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (code > cd->hwm) cd->hwm = code;                 /* High water info */
452065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
452165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (code > cd->start_workspace + cd->workspace_size -
452265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        WORK_SIZE_SAFETY_MARGIN)                       /* Check for overrun */
452365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
452465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR52;
452565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto FAILED;
452665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
452765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
452865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* There is at least one situation where code goes backwards: this is the
452965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case of a zero quantifier after a class (e.g. [ab]{0}). At compile time,
453065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    the class is simply eliminated. However, it is created first, so we have to
453165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    allow memory for it. Therefore, don't ever reduce the length at this point.
453265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    */
453365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
453465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (code < last_code) code = last_code;
453565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
453665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Paranoid check for integer overflow */
453765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
453865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (OFLOW_MAX - *lengthptr < code - last_code)
453965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
454065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR20;
454165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto FAILED;
454265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
454365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
454465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *lengthptr += (int)(code - last_code);
454565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr,
454665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      (int)(code - last_code), c, c));
454765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
454865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If "previous" is set and it is not at the start of the work space, move
454965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    it back to there, in order to avoid filling up the work space. Otherwise,
455065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if "previous" is NULL, reset the current code pointer to the start. */
455165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
455265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (previous != NULL)
455365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
455465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (previous > orig_code)
455565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
455665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        memmove(orig_code, previous, IN_UCHARS(code - previous));
455765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code -= previous - orig_code;
455865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        previous = orig_code;
455965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
456065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
456165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else code = orig_code;
456265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
456365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Remember where this code item starts so we can pick up the length
456465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    next time round. */
456565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
456665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    last_code = code;
456765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
456865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
456965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* In the real compile phase, just check the workspace used by the forward
457065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  reference list. */
457165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
457265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (cd->hwm > cd->start_workspace + cd->workspace_size -
457365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich           WORK_SIZE_SAFETY_MARGIN)
457465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
457565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *errorcodeptr = ERR52;
457665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    goto FAILED;
457765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
457865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
457965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* If in \Q...\E, check for the end; if not, we have a literal */
458065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
458165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (inescq && c != CHAR_NULL)
458265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
458365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
458465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
458565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      inescq = FALSE;
458665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr++;
458765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      continue;
458865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
458965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
459065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
459165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (previous_callout != NULL)
459265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
459365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (lengthptr == NULL)  /* Don't attempt in pre-compile phase */
459465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          complete_callout(previous_callout, ptr, cd);
459565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        previous_callout = NULL;
459665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
459765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if ((options & PCRE_AUTO_CALLOUT) != 0)
459865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
459965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        previous_callout = code;
460065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code = auto_callout(code, ptr, cd);
460165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
460265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto NORMAL_CHAR;
460365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
460465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Control does not reach here. */
460565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
460665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
460765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* In extended mode, skip white space and comments. We need a loop in order
460865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  to check for more white space and more comments after a comment. */
460965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
461065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if ((options & PCRE_EXTENDED) != 0)
461165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
461265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    for (;;)
461365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
461465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
461565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (c != CHAR_NUMBER_SIGN) break;
461665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr++;
461765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (*ptr != CHAR_NULL)
461865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
461965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (IS_NEWLINE(ptr))         /* For non-fixed-length newline cases, */
462065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {                          /* IS_NEWLINE sets cd->nllen. */
462165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr += cd->nllen;
462265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
462365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
462465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;
462565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
462665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (utf) FORWARDCHAR(ptr);
462765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
462865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
462965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c = *ptr;     /* Either NULL or the char after a newline */
463065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
463165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
463265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
463365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* See if the next thing is a quantifier. */
463465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
463565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  is_quantifier =
463665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
463765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
463865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
463965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Fill in length of a previous callout, except when the next thing is a
464065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  quantifier or when processing a property substitution string in UCP mode. */
464165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
464265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (!is_quantifier && previous_callout != NULL && nestptr == NULL &&
464365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       after_manual_callout-- <= 0)
464465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
464565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (lengthptr == NULL)      /* Don't attempt in pre-compile phase */
464665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      complete_callout(previous_callout, ptr, cd);
464765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    previous_callout = NULL;
464865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
464965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
465065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Create auto callout, except for quantifiers, or while processing property
465165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  strings that are substituted for \w etc in UCP mode. */
465265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
465365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL)
465465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
465565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    previous_callout = code;
465665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code = auto_callout(code, ptr, cd);
465765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
465865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
465965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Process the next pattern item. */
466065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
466165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  switch(c)
466265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
466365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* ===================================================================*/
466465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_NULL:                /* The branch terminates at string end */
466565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_VERTICAL_LINE:       /* or | or ) */
466665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_RIGHT_PARENTHESIS:
466765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *firstcharptr = firstchar;
466865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *firstcharflagsptr = firstcharflags;
466965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *reqcharptr = reqchar;
467065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *reqcharflagsptr = reqcharflags;
467165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *codeptr = code;
467265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *ptrptr = ptr;
467365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (lengthptr != NULL)
467465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
467565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (OFLOW_MAX - *lengthptr < code - last_code)
467665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
467765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *errorcodeptr = ERR20;
467865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto FAILED;
467965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
468065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *lengthptr += (int)(code - last_code);   /* To include callout length */
468165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      DPRINTF((">> end branch\n"));
468265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
468365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return TRUE;
468465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
468565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
468665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* ===================================================================*/
468765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Handle single-character metacharacters. In multiline mode, ^ disables
468865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    the setting of any following char as a first character. */
468965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
469065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_CIRCUMFLEX_ACCENT:
469165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    previous = NULL;
469265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if ((options & PCRE_MULTILINE) != 0)
469365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
469465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (firstcharflags == REQ_UNSET)
469565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        zerofirstcharflags = firstcharflags = REQ_NONE;
469665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *code++ = OP_CIRCM;
469765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
469865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else *code++ = OP_CIRC;
469965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
470065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
470165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_DOLLAR_SIGN:
470265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    previous = NULL;
470365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *code++ = ((options & PCRE_MULTILINE) != 0)? OP_DOLLM : OP_DOLL;
470465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
470565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
470665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* There can never be a first char if '.' is first, whatever happens about
470765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    repeats. The value of reqchar doesn't change either. */
470865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
470965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_DOT:
471065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
471165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zerofirstchar = firstchar;
471265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zerofirstcharflags = firstcharflags;
471365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zeroreqchar = reqchar;
471465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zeroreqcharflags = reqcharflags;
471565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    previous = code;
471665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
471765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
471865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
471965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
472065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* ===================================================================*/
472165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Character classes. If the included characters are all < 256, we build a
472265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    32-byte bitmap of the permitted characters, except in the special case
472365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    where there is only one such character. For negated classes, we build the
472465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    map as usual, then invert it at the end. However, we use a different opcode
472565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    so that data characters > 255 can be handled correctly.
472665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
472765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    If the class contains characters outside the 0-255 range, a different
472865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    opcode is compiled. It may optionally have a bit map for characters < 256,
472965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    but those above are are explicitly listed afterwards. A flag byte tells
473065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    whether the bitmap is present, and whether this is a negated class or not.
473165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
473265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    In JavaScript compatibility mode, an isolated ']' causes an error. In
473365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    default (Perl) mode, it is treated as a data character. */
473465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
473565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_RIGHT_SQUARE_BRACKET:
473665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
473765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
473865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR64;
473965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto FAILED;
474065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
474165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    goto NORMAL_CHAR;
474265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
474365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is
474465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    used for "start of word" and "end of word". As these are otherwise illegal
474565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    sequences, we don't break anything by recognizing them. They are replaced
474665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    by \b(?=\w) and \b(?<=\w) respectively. Sequences like [a[:<:]] are
474765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    erroneous and are handled by the normal code below. */
474865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
474965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_LEFT_SQUARE_BRACKET:
475065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0)
475165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
475265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      nestptr = ptr + 7;
475365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr = sub_start_of_word - 1;
475465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      continue;
475565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
475665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
475765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0)
475865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
475965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      nestptr = ptr + 7;
476065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr = sub_end_of_word - 1;
476165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      continue;
476265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
476365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
476465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Handle a real character class. */
476565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
476665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    previous = code;
476765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
476865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
476965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    they are encountered at the top level, so we'll do that too. */
477065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
477165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
477265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         ptr[1] == CHAR_EQUALS_SIGN) &&
477365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        check_posix_syntax(ptr, &tempptr))
477465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
477565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
477665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto FAILED;
477765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
477865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
477965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If the first character is '^', set the negation flag and skip it. Also,
478065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if the first few characters (either before or after ^) are \Q\E or \E we
478165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skip them too. This makes for compatibility with Perl. */
478265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
478365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    negate_class = FALSE;
478465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    for (;;)
478565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
478665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c = *(++ptr);
478765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (c == CHAR_BACKSLASH)
478865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
478965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (ptr[1] == CHAR_E)
479065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr++;
479165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (STRNCMP_UC_C8(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0)
479265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr += 3;
479365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
479465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
479565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
479665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
479765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        negate_class = TRUE;
479865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else break;
479965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
480065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
480165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Empty classes are allowed in JavaScript compatibility mode. Otherwise,
480265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    an initial ']' is taken as a data character -- the code below handles
480365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
480465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    [^] must match any character, so generate OP_ALLANY. */
480565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
480665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c == CHAR_RIGHT_SQUARE_BRACKET &&
480765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
480865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
480965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *code++ = negate_class? OP_ALLANY : OP_FAIL;
481065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
481165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zerofirstchar = firstchar;
481265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zerofirstcharflags = firstcharflags;
481365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;
481465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
481565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
481665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If a class contains a negative special such as \S, we need to flip the
481765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    negation flag at the end, so that support for characters > 255 works
481865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    correctly (they are all included in the class). */
481965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
482065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    should_flip_negation = FALSE;
482165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
482265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Extended class (xclass) will be used when characters > 255
482365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    might match. */
482465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
482565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
482665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    xclass = FALSE;
482765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    class_uchardata = code + LINK_SIZE + 2;   /* For XCLASS items */
482865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    class_uchardata_base = class_uchardata;   /* Save the start */
482965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
483065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
483165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* For optimization purposes, we track some properties of the class:
483265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    class_has_8bitchar will be non-zero if the class contains at least one <
483365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    256 character; class_one_char will be 1 if the class contains just one
483465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    character; xclass_has_prop will be TRUE if unicode property checks
483565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    are present in the class. */
483665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
483765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    class_has_8bitchar = 0;
483865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    class_one_char = 0;
483965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
484065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    xclass_has_prop = FALSE;
484165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
484265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
484365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Initialize the 32-char bit map to all zeros. We build the map in a
484465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    temporary bit of memory, in case the class contains fewer than two
484565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    8-bit characters because in that case the compiled code doesn't use the bit
484665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    map. */
484765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
484865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    memset(classbits, 0, 32 * sizeof(pcre_uint8));
484965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
485065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Process characters until ] is reached. By writing this as a "do" it
485165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    means that an initial ] is taken as a data character. At the start of the
485265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    loop, c contains the first byte of the character. */
485365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
485465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c != CHAR_NULL) do
485565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
485665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      const pcre_uchar *oldptr;
485765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
485865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
485965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (utf && HAS_EXTRALEN(c))
486065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {                           /* Braces are required because the */
486165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
486265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
486365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
486465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
486565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
486665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* In the pre-compile phase, accumulate the length of any extra
486765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      data and reset the pointer. This is so that very large classes that
486865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      contain a zillion > 255 characters no longer overwrite the work space
486965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      (which is on the stack). We have to remember that there was XCLASS data,
487065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      however. */
487165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
487265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (lengthptr != NULL && class_uchardata > class_uchardata_base)
487365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
487465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        xclass = TRUE;
487565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *lengthptr += (int)(class_uchardata - class_uchardata_base);
487665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        class_uchardata = class_uchardata_base;
487765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
487865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
487965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
488065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Inside \Q...\E everything is literal except \E */
488165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
488265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (inescq)
488365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
488465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)  /* If we are at \E */
488565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
488665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          inescq = FALSE;                   /* Reset literal state */
488765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr++;                            /* Skip the 'E' */
488865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          continue;                         /* Carry on with next */
488965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
489065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto CHECK_RANGE;                   /* Could be range if \E follows */
489165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
489265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
489365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Handle POSIX class names. Perl allows a negation extension of the
489465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      form [:^name:]. A square bracket that doesn't match the syntax is
489565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      treated as a literal. We also recognize the POSIX constructions
489665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
489765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      5.6 and 5.8 do. */
489865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
489965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (c == CHAR_LEFT_SQUARE_BRACKET &&
490065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
490165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich           ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
490265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
490365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        BOOL local_negate = FALSE;
490465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        int posix_class, taboffset, tabopt;
490565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        register const pcre_uint8 *cbits = cd->cbits;
490665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        pcre_uint8 pbits[32];
490765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
490865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (ptr[1] != CHAR_COLON)
490965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
491065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR31;
491165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
491265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
491365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
491465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr += 2;
491565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
491665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
491765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          local_negate = TRUE;
491865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          should_flip_negation = TRUE;  /* Note negative special */
491965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr++;
492065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
492165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
492265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        posix_class = check_posix_name(ptr, (int)(tempptr - ptr));
492365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (posix_class < 0)
492465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
492565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR30;
492665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
492765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
492865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
492965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* If matching is caseless, upper and lower are converted to
493065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        alpha. This relies on the fact that the class table starts with
493165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        alpha, lower, upper as the first 3 entries. */
493265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
493365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
493465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          posix_class = 0;
493565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
493665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* When PCRE_UCP is set, some of the POSIX classes are converted to
493765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        different escape sequences that use Unicode properties \p or \P. Others
493865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
493965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        directly. */
494065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
494165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
494265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if ((options & PCRE_UCP) != 0)
494365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
494465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          unsigned int ptype = 0;
494565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
494665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
494765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* The posix_substitutes table specifies which POSIX classes can be
494865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          converted to \p or \P items. */
494965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
495065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (posix_substitutes[pc] != NULL)
495165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
495265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            nestptr = tempptr + 1;
495365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            ptr = posix_substitutes[pc] - 1;
495465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
495565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
495665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
495765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* There are three other classes that generate special property calls
495865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          that are recognized only in an XCLASS. */
495965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
496065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else switch(posix_class)
496165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
496265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case PC_GRAPH:
496365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            ptype = PT_PXGRAPH;
496465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* Fall through */
496565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case PC_PRINT:
496665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (ptype == 0) ptype = PT_PXPRINT;
496765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* Fall through */
496865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case PC_PUNCT:
496965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (ptype == 0) ptype = PT_PXPUNCT;
497065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
497165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *class_uchardata++ = ptype;
497265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *class_uchardata++ = 0;
497365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            xclass_has_prop = TRUE;
497465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            ptr = tempptr + 1;
497565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
497665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
497765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* For all other POSIX classes, no special action is taken in UCP
497865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            mode. Fall through to the non_UCP case. */
497965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
498065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            default:
498165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            break;
498265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
498365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
498465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
498565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* In the non-UCP case, or when UCP makes no difference, we build the
498665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        bit map for the POSIX class in a chunk of local store because we may be
498765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        adding and subtracting from it, and we don't want to subtract bits that
498865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        may be in the main map already. At the end we or the result into the
498965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        bit map that is being built. */
499065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
499165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        posix_class *= 3;
499265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
499365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Copy in the first table (always present) */
499465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
499565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        memcpy(pbits, cbits + posix_class_maps[posix_class],
499665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          32 * sizeof(pcre_uint8));
499765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
499865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* If there is a second table, add or remove it as required. */
499965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
500065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        taboffset = posix_class_maps[posix_class + 1];
500165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        tabopt = posix_class_maps[posix_class + 2];
500265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
500365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (taboffset >= 0)
500465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
500565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (tabopt >= 0)
500665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset];
500765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else
500865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset];
500965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
501065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
501165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Now see if we need to remove any special characters. An option
501265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        value of 1 removes vertical space and 2 removes underscore. */
501365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
501465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (tabopt < 0) tabopt = -tabopt;
501565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (tabopt == 1) pbits[1] &= ~0x3c;
501665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else if (tabopt == 2) pbits[11] &= 0x7f;
501765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
501865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Add the POSIX table or its complement into the main table that is
501965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        being built and we are done. */
502065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
502165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (local_negate)
502265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c];
502365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
502465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
502565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
502665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr = tempptr + 1;
502765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Every class contains at least one < 256 character. */
502865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        class_has_8bitchar = 1;
502965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Every class contains at least two characters. */
503065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        class_one_char = 2;
503165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        continue;    /* End of POSIX syntax handling */
503265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
503365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
503465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Backslash may introduce a single character, or it may introduce one
503565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      of the specials, which just set a flag. The sequence \b is a special
503665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      case. Inside a class (and only there) it is treated as backspace. We
503765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      assume that other escapes have more than one character in them, so
503865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      speculatively set both class_has_8bitchar and class_one_char bigger
503965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      than one. Unrecognized escapes fall through and are either treated
504065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      as literal characters (by default), or are faulted if
504165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      PCRE_EXTRA is set. */
504265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
504365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (c == CHAR_BACKSLASH)
504465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
504565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
504665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          TRUE);
504765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*errorcodeptr != 0) goto FAILED;
504865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (escape == 0) c = ec;
504965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
505065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (escape == ESC_N)          /* \N is not supported in a class */
505165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
505265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR71;
505365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
505465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
505565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (escape == ESC_Q)            /* Handle start of quoted string */
505665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
505765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
505865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
505965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            ptr += 2; /* avoid empty string */
506065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
506165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else inescq = TRUE;
506265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          continue;
506365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
506465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (escape == ESC_E) continue;  /* Ignore orphan \E */
506565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
506665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
506765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
506865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          register const pcre_uint8 *cbits = cd->cbits;
506965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Every class contains at least two < 256 characters. */
507065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          class_has_8bitchar++;
507165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Every class contains at least two characters. */
507265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          class_one_char += 2;
507365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
507465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          switch (escape)
507565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
507665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
507765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_du:     /* These are the values given for \d etc */
507865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_DU:     /* when PCRE_UCP is set. We replace the */
507965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_wu:     /* escape sequence with an appropriate \p */
508065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_WU:     /* or \P to test Unicode properties instead */
508165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_su:     /* of the default ASCII testing. */
508265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_SU:
508365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            nestptr = ptr;
508465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            ptr = substitutes[escape - ESC_DU] - 1;  /* Just before substitute */
508565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            class_has_8bitchar--;                /* Undo! */
508665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
508765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
508865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_d:
508965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];
509065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
509165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
509265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_D:
509365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            should_flip_negation = TRUE;
509465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
509565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
509665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
509765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_w:
509865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word];
509965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
510065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
510165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_W:
510265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            should_flip_negation = TRUE;
510365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
510465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
510565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
510665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
510765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
510865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            previously set by something earlier in the character class.
510965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
511065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            we could just adjust the appropriate bit. From PCRE 8.34 we no
511165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            longer treat \s and \S specially. */
511265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
511365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_s:
511465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
511565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
511665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
511765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_S:
511865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            should_flip_negation = TRUE;
511965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
512065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
512165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
512265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* The rest apply in both UCP and non-UCP cases. */
512365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
512465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_h:
512565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (void)add_list_to_class(classbits, &class_uchardata, options, cd,
512665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              PRIV(hspace_list), NOTACHAR);
512765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
512865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
512965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_H:
513065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (void)add_not_list_to_class(classbits, &class_uchardata, options,
513165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              cd, PRIV(hspace_list));
513265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
513365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
513465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_v:
513565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (void)add_list_to_class(classbits, &class_uchardata, options, cd,
513665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              PRIV(vspace_list), NOTACHAR);
513765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
513865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
513965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_V:
514065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (void)add_not_list_to_class(classbits, &class_uchardata, options,
514165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              cd, PRIV(vspace_list));
514265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;
514365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
514465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
514565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_p:
514665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case ESC_P:
514765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
514865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              BOOL negated;
514965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              unsigned int ptype = 0, pdata = 0;
515065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
515165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                goto FAILED;
515265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *class_uchardata++ = ((escape == ESC_p) != negated)?
515365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                XCL_PROP : XCL_NOTPROP;
515465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *class_uchardata++ = ptype;
515565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *class_uchardata++ = pdata;
515665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              xclass_has_prop = TRUE;
515765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              class_has_8bitchar--;                /* Undo! */
515865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              continue;
515965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
516065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
516165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* Unrecognized escapes are faulted if PCRE is running in its
516265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            strict mode. By default, for compatibility with Perl, they are
516365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            treated as literals. */
516465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
516565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            default:
516665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if ((options & PCRE_EXTRA) != 0)
516765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
516865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR7;
516965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
517065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
517165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            class_has_8bitchar--;    /* Undo the speculative increase. */
517265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            class_one_char -= 2;     /* Undo the speculative increase. */
517365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            c = *ptr;                /* Get the final character and fall through */
517465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            break;
517565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
517665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
517765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
517865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Fall through if the escape just defined a single character (c >= 0).
517965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        This may be greater than 256. */
518065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
518165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        escape = 0;
518265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
518365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }   /* End of backslash handling */
518465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
518565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* A character may be followed by '-' to form a range. However, Perl does
518665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      not permit ']' to be the end of the range. A '-' character at the end is
518765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      treated as a literal. Perl ignores orphaned \E sequences entirely. The
518865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      code for handling \Q and \E is messy. */
518965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
519065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      CHECK_RANGE:
519165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
519265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
519365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        inescq = FALSE;
519465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr += 2;
519565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
519665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      oldptr = ptr;
519765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
519865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Remember if \r or \n were explicitly used */
519965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
520065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
520165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
520265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Check for range */
520365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
520465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (!inescq && ptr[1] == CHAR_MINUS)
520565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
520665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        pcre_uint32 d;
520765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr += 2;
520865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
520965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
521065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* If we hit \Q (not followed by \E) at this point, go into escaped
521165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        mode. */
521265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
521365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
521465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
521565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr += 2;
521665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
521765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            { ptr += 2; continue; }
521865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          inescq = TRUE;
521965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
522065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
522165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
522265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Minus (hyphen) at the end of a class is treated as a literal, so put
522365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        back the pointer and jump to handle the character that preceded it. */
522465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
522565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
522665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
522765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr = oldptr;
522865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto CLASS_SINGLE_CHARACTER;
522965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
523065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
523165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Otherwise, we have a potential range; pick up the next character */
523265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
523365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
523465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (utf)
523565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {                           /* Braces are required because the */
523665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */
523765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
523865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
523965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
524065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        d = *ptr;  /* Not UTF-8 mode */
524165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
524265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* The second part of a range can be a single-character escape
524365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        sequence, but not any of the other escapes. Perl treats a hyphen as a
524465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        literal in such circumstances. However, in Perl's warning mode, a
524565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        warning is given, so PCRE now faults it as it is almost certainly a
524665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        mistake on the user's part. */
524765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
524865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (!inescq)
524965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
525065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (d == CHAR_BACKSLASH)
525165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
525265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            int descape;
525365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            descape = check_escape(&ptr, &d, errorcodeptr, cd->bracount, options, TRUE);
525465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (*errorcodeptr != 0) goto FAILED;
525565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
525665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* 0 means a character was put into d; \b is backspace; any other
525765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            special causes an error. */
525865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
525965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (descape != 0)
526065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
526165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              if (descape == ESC_b) d = CHAR_BS; else
526265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                {
526365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                *errorcodeptr = ERR83;
526465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                goto FAILED;
526565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                }
526665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
526765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
526865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
526965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* A hyphen followed by a POSIX class is treated in the same way. */
527065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
527165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else if (d == CHAR_LEFT_SQUARE_BRACKET &&
527265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                   (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
527365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                    ptr[1] == CHAR_EQUALS_SIGN) &&
527465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                   check_posix_syntax(ptr, &tempptr))
527565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
527665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR83;
527765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
527865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
527965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
528065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
528165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Check that the two values are in the correct order. Optimize
528265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        one-character ranges. */
528365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
528465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (d < c)
528565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
528665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR8;
528765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
528865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
528965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (d == c) goto CLASS_SINGLE_CHARACTER;  /* A few lines below */
529065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
529165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* We have found a character range, so single character optimizations
529265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        cannot be done anymore. Any value greater than 1 indicates that there
529365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        is more than one character. */
529465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
529565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        class_one_char = 2;
529665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
529765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Remember an explicit \r or \n, and add the range to the class. */
529865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
529965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
530065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
530165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        class_has_8bitchar +=
530265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          add_to_class(classbits, &class_uchardata, options, cd, c, d);
530365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
530465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        continue;   /* Go get the next char in the class */
530565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
530665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
530765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Handle a single character - we can get here for a normal non-escape
530865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      char, or after \ that introduces a single character or for an apparent
530965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      range that isn't. Only the value 1 matters for class_one_char, so don't
531065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      increase it if it is already 2 or more ... just in case there's a class
531165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      with a zillion characters in it. */
531265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
531365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      CLASS_SINGLE_CHARACTER:
531465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (class_one_char < 2) class_one_char++;
531565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
531665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If class_one_char is 1, we have the first single character in the
531765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      class, and there have been no prior ranges, or XCLASS items generated by
531865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      escapes. If this is the final character in the class, we can optimize by
531965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      turning the item into a 1-character OP_CHAR[I] if it's positive, or
532065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      OP_NOT[I] if it's negative. In the positive case, it can cause firstchar
532165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      to be set. Otherwise, there can be no first char if this item is first,
532265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      whatever repeat count may follow. In the case of reqchar, save the
532365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      previous value for reinstating. */
532465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
532565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
532665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
532765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;
532865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        zeroreqchar = reqchar;
532965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        zeroreqcharflags = reqcharflags;
533065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
533165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (negate_class)
533265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
533365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
533465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          int d;
533565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
533665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
533765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          zerofirstchar = firstchar;
533865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          zerofirstcharflags = firstcharflags;
533965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
534065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* For caseless UTF-8 mode when UCP support is available, check
534165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          whether this character has more than one other case. If so, generate
534265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          a special OP_NOTPROP item instead of OP_NOTI. */
534365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
534465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
534565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (utf && (options & PCRE_CASELESS) != 0 &&
534665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              (d = UCD_CASESET(c)) != 0)
534765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
534865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = OP_NOTPROP;
534965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = PT_CLIST;
535065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = d;
535165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
535265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else
535365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
535465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Char has only one other case, or UCP not available */
535565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
535665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
535765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
535865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
535965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
536065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              code += PRIV(ord2utf)(c, code);
536165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            else
536265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
536365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *code++ = c;
536465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
536565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
536665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* We are finished with this character class */
536765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
536865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto END_CLASS;
536965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
537065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
537165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* For a single, positive character, get the value into mcbuffer, and
537265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        then we can handle this with the normal one-character code. */
537365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
537465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
537565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
537665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          mclength = PRIV(ord2utf)(c, mcbuffer);
537765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
537865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
537965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
538065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          mcbuffer[0] = c;
538165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          mclength = 1;
538265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
538365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto ONE_CHAR;
538465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }       /* End of 1-char optimization */
538565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
538665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* There is more than one character in the class, or an XCLASS item
538765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      has been generated. Add this character to the class. */
538865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
538965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      class_has_8bitchar +=
539065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        add_to_class(classbits, &class_uchardata, options, cd, c, c);
539165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
539265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
539365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Loop until ']' reached. This "while" is the end of the "do" far above.
539465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    If we are at the end of an internal nested string, revert to the outer
539565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    string. */
539665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
539765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    while (((c = *(++ptr)) != CHAR_NULL ||
539865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich           (nestptr != NULL &&
539965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != CHAR_NULL)) &&
540065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich           (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
540165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
540265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Check for missing terminating ']' */
540365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
540465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c == CHAR_NULL)
540565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
540665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR6;
540765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto FAILED;
540865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
540965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
541065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* We will need an XCLASS if data has been placed in class_uchardata. In
541165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    the second phase this is a sufficient test. However, in the pre-compile
541265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    phase, class_uchardata gets emptied to prevent workspace overflow, so it
541365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    only if the very last character in the class needs XCLASS will it contain
541465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    anything at this point. For this reason, xclass gets set TRUE above when
541565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    uchar_classdata is emptied, and that's why this code is the way it is here
541665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    instead of just doing a test on class_uchardata below. */
541765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
541865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
541965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (class_uchardata > class_uchardata_base) xclass = TRUE;
542065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
542165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
542265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If this is the first thing in the branch, there can be no first char
542365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    setting, whatever the repeat count. Any reqchar setting must remain
542465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    unchanged after any kind of repeat. */
542565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
542665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
542765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zerofirstchar = firstchar;
542865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zerofirstcharflags = firstcharflags;
542965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zeroreqchar = reqchar;
543065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zeroreqcharflags = reqcharflags;
543165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
543265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If there are characters with values > 255, we have to compile an
543365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    extended class, with its own opcode, unless there was a negated special
543465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    such as \S in the class, and PCRE_UCP is not set, because in that case all
543565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    characters > 255 are in the class, so any that were explicitly given as
543665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    well can be ignored. If (when there are explicit characters > 255 that must
543765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    be listed) there are no characters < 256, we can omit the bitmap in the
543865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    actual compiled code. */
543965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
544065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
544165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (xclass && (!should_flip_negation || (options & PCRE_UCP) != 0))
544265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif !defined COMPILE_PCRE8
544365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (xclass && !should_flip_negation)
544465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
544565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
544665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
544765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *class_uchardata++ = XCL_END;    /* Marks the end of extra data */
544865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *code++ = OP_XCLASS;
544965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      code += LINK_SIZE;
545065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *code = negate_class? XCL_NOT:0;
545165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (xclass_has_prop) *code |= XCL_HASPROP;
545265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
545365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If the map is required, move up the extra data to make room for it;
545465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      otherwise just move the code pointer to the end of the extra data. */
545565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
545665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (class_has_8bitchar > 0)
545765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
545865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ |= XCL_MAP;
545965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        memmove(code + (32 / sizeof(pcre_uchar)), code,
546065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          IN_UCHARS(class_uchardata - code));
546165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (negate_class && !xclass_has_prop)
546265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
546365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        memcpy(code, classbits, 32);
546465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code = class_uchardata + (32 / sizeof(pcre_uchar));
546565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
546665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else code = class_uchardata;
546765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
546865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Now fill in the complete length of the item */
546965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
547065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      PUT(previous, 1, (int)(code - previous));
547165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;   /* End of class handling */
547265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
547365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
547465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
547565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If there are no characters > 255, or they are all to be included or
547665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
547765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    whole class was negated and whether there were negative specials such as \S
547865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    (non-UCP) in the class. Then copy the 32-byte map into the code vector,
547965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    negating it if necessary. */
548065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
548165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
548265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (lengthptr == NULL)    /* Save time in the pre-compile phase */
548365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
548465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (negate_class)
548565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
548665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      memcpy(code, classbits, 32);
548765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
548865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += 32 / sizeof(pcre_uchar);
548965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
549065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    END_CLASS:
549165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
549265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
549365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
549465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* ===================================================================*/
549565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
549665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    has been tested above. */
549765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
549865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_LEFT_CURLY_BRACKET:
549965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (!is_quantifier) goto NORMAL_CHAR;
550065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
550165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (*errorcodeptr != 0) goto FAILED;
550265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    goto REPEAT;
550365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
550465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_ASTERISK:
550565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    repeat_min = 0;
550665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    repeat_max = -1;
550765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    goto REPEAT;
550865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
550965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_PLUS:
551065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    repeat_min = 1;
551165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    repeat_max = -1;
551265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    goto REPEAT;
551365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
551465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_QUESTION_MARK:
551565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    repeat_min = 0;
551665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    repeat_max = 1;
551765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
551865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    REPEAT:
551965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (previous == NULL)
552065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
552165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR9;
552265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto FAILED;
552365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
552465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
552565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (repeat_min == 0)
552665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
552765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      firstchar = zerofirstchar;    /* Adjust for zero repeat */
552865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      firstcharflags = zerofirstcharflags;
552965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      reqchar = zeroreqchar;        /* Ditto */
553065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      reqcharflags = zeroreqcharflags;
553165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
553265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
553365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Remember whether this is a variable length repeat */
553465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
553565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
553665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
553765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    op_type = 0;                    /* Default single-char op codes */
553865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    possessive_quantifier = FALSE;  /* Default not possessive quantifier */
553965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
554065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Save start of previous item, in case we have to move it up in order to
554165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    insert something before it. */
554265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
554365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    tempcode = previous;
554465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
554565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Before checking for a possessive quantifier, we must skip over
554665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    whitespace and comments in extended mode because Perl allows white space at
554765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    this point. */
554865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
554965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if ((options & PCRE_EXTENDED) != 0)
555065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
555165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      const pcre_uchar *p = ptr + 1;
555265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      for (;;)
555365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
555465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (MAX_255(*p) && (cd->ctypes[*p] & ctype_space) != 0) p++;
555565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*p != CHAR_NUMBER_SIGN) break;
555665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        p++;
555765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (*p != CHAR_NULL)
555865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
555965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (IS_NEWLINE(p))         /* For non-fixed-length newline cases, */
556065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {                        /* IS_NEWLINE sets cd->nllen. */
556165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            p += cd->nllen;
556265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            break;
556365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
556465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          p++;
556565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
556665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (utf) FORWARDCHAR(p);
556765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
556865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }           /* Loop for comment characters */
556965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }             /* Loop for multiple comments */
557065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr = p - 1;    /* Character before the next significant one. */
557165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
557265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
557365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If the next character is '+', we have a possessive quantifier. This
557465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    implies greediness, whatever the setting of the PCRE_UNGREEDY option.
557565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    If the next character is '?' this is a minimizing repeat, by default,
557665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    but if PCRE_UNGREEDY is set, it works the other way round. We change the
557765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    repeat type to the non-default. */
557865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
557965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (ptr[1] == CHAR_PLUS)
558065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
558165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      repeat_type = 0;                  /* Force greedy */
558265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      possessive_quantifier = TRUE;
558365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr++;
558465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
558565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else if (ptr[1] == CHAR_QUESTION_MARK)
558665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
558765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      repeat_type = greedy_non_default;
558865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr++;
558965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
559065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else repeat_type = greedy_default;
559165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
559265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If previous was a recursion call, wrap it in atomic brackets so that
559365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    previous becomes the atomic group. All recursions were so wrapped in the
559465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    past, but it no longer happens for non-repeated recursions. In fact, the
559565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    repeated ones could be re-implemented independently so as not to need this,
559665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    but for the moment we rely on the code for repeating groups. */
559765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
559865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (*previous == OP_RECURSE)
559965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
560065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE));
560165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *previous = OP_ONCE;
560265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      PUT(previous, 1, 2 + 2*LINK_SIZE);
560365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      previous[2 + 2*LINK_SIZE] = OP_KET;
560465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
560565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      code += 2 + 2 * LINK_SIZE;
560665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      length_prevgroup = 3 + 3*LINK_SIZE;
560765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
560865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* When actually compiling, we need to check whether this was a forward
560965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      reference, and if so, adjust the offset. */
561065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
561165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (lengthptr == NULL && cd->hwm >= cd->start_workspace + LINK_SIZE)
561265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
561365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        int offset = GET(cd->hwm, -LINK_SIZE);
561465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (offset == previous + 1 - cd->start_code)
561565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUT(cd->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE);
561665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
561765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
561865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
561965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Now handle repetition for the different types of item. */
562065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
562165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If previous was a character or negated character match, abolish the item
562265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    and generate a repeat item instead. If a char item has a minimum of more
562365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    than one, ensure that it is set in reqchar - it might not be if a sequence
562465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    such as x{3} is the first thing in a branch because the x will have gone
562565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    into firstchar instead.  */
562665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
562765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (*previous == OP_CHAR || *previous == OP_CHARI
562865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        || *previous == OP_NOT || *previous == OP_NOTI)
562965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
563065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      switch (*previous)
563165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
563265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        default: /* Make compiler happy. */
563365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_CHAR:  op_type = OP_STAR - OP_STAR; break;
563465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
563565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOT:   op_type = OP_NOTSTAR - OP_STAR; break;
563665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTI:  op_type = OP_NOTSTARI - OP_STAR; break;
563765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
563865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
563965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Deal with UTF characters that take up more than one character. It's
564065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      easier to write this out separately than try to macrify it. Use c to
564165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      hold the length of the character in bytes, plus UTF_LENGTH to flag that
564265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      it's a length rather than a small character. */
564365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
564465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
564565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (utf && NOT_FIRSTCHAR(code[-1]))
564665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
564765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        pcre_uchar *lastchar = code - 1;
564865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        BACKCHAR(lastchar);
564965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        c = (int)(code - lastchar);     /* Length of UTF-8 character */
565065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */
565165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        c |= UTF_LENGTH;                /* Flag c as a length */
565265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
565365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else
565465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* SUPPORT_UTF */
565565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
565665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Handle the case of a single charater - either with no UTF support, or
565765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      with UTF disabled, or for a single character UTF character. */
565865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
565965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        c = code[-1];
566065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*previous <= OP_CHARI && repeat_min > 1)
566165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
566265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          reqchar = c;
566365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          reqcharflags = req_caseopt | cd->req_varyopt;
566465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
566565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
566665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
566765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
566865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
566965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
567065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If previous was a character type match (\d or similar), abolish it and
567165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    create a suitable repeat item. The code is shared with single-character
567265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    repeats by setting op_type to add a suitable offset into repeat_type. Note
567365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    the the Unicode property types will be present only when SUPPORT_UCP is
567465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    defined, but we don't wrap the little bits of code here because it just
567565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    makes it horribly messy. */
567665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
567765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else if (*previous < OP_EODN)
567865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
567965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      pcre_uchar *oldcode;
568065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int prop_type, prop_value;
568165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
568265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c = *previous;
568365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
568465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      OUTPUT_SINGLE_REPEAT:
568565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (*previous == OP_PROP || *previous == OP_NOTPROP)
568665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
568765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        prop_type = previous[1];
568865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        prop_value = previous[2];
568965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
569065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else prop_type = prop_value = -1;
569165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
569265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      oldcode = code;
569365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      code = previous;                  /* Usually overwrite previous item */
569465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
569565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If the maximum is zero then the minimum must also be zero; Perl allows
569665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      this case, so we do too - by simply omitting the item altogether. */
569765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
569865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (repeat_max == 0) goto END_REPEAT;
569965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
570065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Combine the op_type with the repeat_type */
570165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
570265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      repeat_type += op_type;
570365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
570465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* A minimum of zero is handled either as the special case * or ?, or as
570565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      an UPTO, with the maximum given. */
570665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
570765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (repeat_min == 0)
570865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
570965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
571065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
571165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
571265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
571365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code++ = OP_UPTO + repeat_type;
571465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUT2INC(code, 0, repeat_max);
571565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
571665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
571765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
571865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* A repeat minimum of 1 is optimized into some special cases. If the
571965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      maximum is unlimited, we use OP_PLUS. Otherwise, the original item is
572065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      left in place and, if the maximum is greater than 1, we use OP_UPTO with
572165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      one less than the maximum. */
572265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
572365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (repeat_min == 1)
572465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
572565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (repeat_max == -1)
572665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code++ = OP_PLUS + repeat_type;
572765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
572865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
572965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          code = oldcode;                 /* leave previous item in place */
573065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (repeat_max == 1) goto END_REPEAT;
573165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code++ = OP_UPTO + repeat_type;
573265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUT2INC(code, 0, repeat_max - 1);
573365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
573465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
573565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
573665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* The case {n,n} is just an EXACT, while the general case {n,m} is
573765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      handled as an EXACT followed by an UPTO. */
573865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
573965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else
574065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
574165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = OP_EXACT + op_type;  /* NB EXACT doesn't have repeat_type */
574265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        PUT2INC(code, 0, repeat_min);
574365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
574465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* If the maximum is unlimited, insert an OP_STAR. Before doing so,
574565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        we have to insert the character for the previous code. For a repeated
574665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        Unicode property match, there are two extra bytes that define the
574765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        required property. In UTF-8 mode, long characters have their length in
574865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        c, with the UTF_LENGTH bit as a flag. */
574965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
575065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (repeat_max < 0)
575165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
575265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
575365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (utf && (c & UTF_LENGTH) != 0)
575465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
575565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            memcpy(code, utf_chars, IN_UCHARS(c & 7));
575665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            code += c & 7;
575765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
575865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else
575965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
576065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
576165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = c;
576265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (prop_type >= 0)
576365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
576465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *code++ = prop_type;
576565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *code++ = prop_value;
576665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
576765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
576865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code++ = OP_STAR + repeat_type;
576965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
577065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
577165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Else insert an UPTO if the max is greater than the min, again
577265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        preceded by the character, for the previously inserted code. If the
577365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        UPTO is just for 1 instance, we can use QUERY instead. */
577465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
577565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (repeat_max != repeat_min)
577665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
577765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
577865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (utf && (c & UTF_LENGTH) != 0)
577965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
578065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            memcpy(code, utf_chars, IN_UCHARS(c & 7));
578165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            code += c & 7;
578265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
578365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else
578465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
578565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code++ = c;
578665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (prop_type >= 0)
578765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
578865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = prop_type;
578965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = prop_value;
579065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
579165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          repeat_max -= repeat_min;
579265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
579365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (repeat_max == 1)
579465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
579565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = OP_QUERY + repeat_type;
579665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
579765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else
579865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
579965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = OP_UPTO + repeat_type;
580065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            PUT2INC(code, 0, repeat_max);
580165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
580265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
580365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
580465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
580565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* The character or character type itself comes last in all cases. */
580665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
580765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
580865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (utf && (c & UTF_LENGTH) != 0)
580965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
581065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        memcpy(code, utf_chars, IN_UCHARS(c & 7));
581165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code += c & 7;
581265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
581365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else
581465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
581565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *code++ = c;
581665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
581765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* For a repeated Unicode property match, there are two extra bytes that
581865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      define the required property. */
581965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
582065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
582165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (prop_type >= 0)
582265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
582365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = prop_type;
582465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = prop_value;
582565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
582665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
582765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
582865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
582965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If previous was a character class or a back reference, we put the repeat
583065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    stuff after it, but just skip the item if the repeat was {0,0}. */
583165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
583265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else if (*previous == OP_CLASS || *previous == OP_NCLASS ||
583365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
583465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             *previous == OP_XCLASS ||
583565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
583665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             *previous == OP_REF   || *previous == OP_REFI ||
583765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             *previous == OP_DNREF || *previous == OP_DNREFI)
583865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
583965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (repeat_max == 0)
584065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
584165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code = previous;
584265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto END_REPEAT;
584365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
584465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
584565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (repeat_min == 0 && repeat_max == -1)
584665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = OP_CRSTAR + repeat_type;
584765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (repeat_min == 1 && repeat_max == -1)
584865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = OP_CRPLUS + repeat_type;
584965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (repeat_min == 0 && repeat_max == 1)
585065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = OP_CRQUERY + repeat_type;
585165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else
585265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
585365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = OP_CRRANGE + repeat_type;
585465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        PUT2INC(code, 0, repeat_min);
585565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (repeat_max == -1) repeat_max = 0;  /* 2-byte encoding for max */
585665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        PUT2INC(code, 0, repeat_max);
585765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
585865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
585965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
586065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If previous was a bracket group, we may have to replicate it in certain
586165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cases. Note that at this point we can encounter only the "basic" bracket
586265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    opcodes such as BRA and CBRA, as this is the place where they get converted
586365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    into the more special varieties such as BRAPOS and SBRA. A test for >=
586465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    OP_ASSERT and <= OP_COND includes ASSERT, ASSERT_NOT, ASSERTBACK,
586565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ASSERTBACK_NOT, ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND.
586665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    Originally, PCRE did not allow repetition of assertions, but now it does,
586765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    for Perl compatibility. */
586865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
586965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else if (*previous >= OP_ASSERT && *previous <= OP_COND)
587065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
587165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      register int i;
587265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int len = (int)(code - previous);
587365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      pcre_uchar *bralink = NULL;
587465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      pcre_uchar *brazeroptr = NULL;
587565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
587665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
587765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      we just ignore the repeat. */
587865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
587965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
588065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto END_REPEAT;
588165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
588265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* There is no sense in actually repeating assertions. The only potential
588365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      use of repetition is in cases when the assertion is optional. Therefore,
588465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if the minimum is greater than zero, just ignore the repeat. If the
588565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      maximum is not zero or one, set it to 1. */
588665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
588765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (*previous < OP_ONCE)    /* Assertion */
588865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
588965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (repeat_min > 0) goto END_REPEAT;
589065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (repeat_max < 0 || repeat_max > 1) repeat_max = 1;
589165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
589265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
589365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* The case of a zero minimum is special because of the need to stick
589465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      OP_BRAZERO in front of it, and because the group appears once in the
589565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      data, whereas in other cases it appears the minimum number of times. For
589665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      this reason, it is simplest to treat this case separately, as otherwise
589765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      the code gets far too messy. There are several special subcases when the
589865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      minimum is zero. */
589965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
590065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (repeat_min == 0)
590165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
590265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* If the maximum is also zero, we used to just omit the group from the
590365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        output altogether, like this:
590465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
590565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ** if (repeat_max == 0)
590665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        **   {
590765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        **   code = previous;
590865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        **   goto END_REPEAT;
590965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        **   }
591065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
591165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        However, that fails when a group or a subgroup within it is referenced
591265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        as a subroutine from elsewhere in the pattern, so now we stick in
591365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        OP_SKIPZERO in front of it so that it is skipped on execution. As we
591465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        don't have a list of which groups are referenced, we cannot do this
591565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        selectively.
591665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
591765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
591865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        and do no more at this point. However, we do need to adjust any
591965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        OP_RECURSE calls inside the group that refer to the group itself or any
592065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        internal or forward referenced group, because the offset is from the
592165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        start of the whole regex. Temporarily terminate the pattern while doing
592265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        this. */
592365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
592465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
592565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
592665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code = OP_END;
592765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          adjust_recurse(previous, 1, utf, cd, save_hwm);
592865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          memmove(previous + 1, previous, IN_UCHARS(len));
592965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          code++;
593065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (repeat_max == 0)
593165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
593265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *previous++ = OP_SKIPZERO;
593365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto END_REPEAT;
593465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
593565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          brazeroptr = previous;    /* Save for possessive optimizing */
593665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *previous++ = OP_BRAZERO + repeat_type;
593765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
593865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
593965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* If the maximum is greater than 1 and limited, we have to replicate
594065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        in a nested fashion, sticking OP_BRAZERO before each set of brackets.
594165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        The first one has to be handled carefully because it's the original
594265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        copy, which has to be moved up. The remainder can be handled by code
594365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        that is common with the non-zero minimum case below. We have to
594465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        adjust the value or repeat_max, since one less copy is required. Once
594565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        again, we may have to adjust any OP_RECURSE calls inside the group. */
594665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
594765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
594865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
594965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          int offset;
595065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code = OP_END;
595165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm);
595265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
595365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          code += 2 + LINK_SIZE;
595465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *previous++ = OP_BRAZERO + repeat_type;
595565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *previous++ = OP_BRA;
595665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
595765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* We chain together the bracket offset fields that have to be
595865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          filled in later when the ends of the brackets are reached. */
595965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
596065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          offset = (bralink == NULL)? 0 : (int)(previous - bralink);
596165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          bralink = previous;
596265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUTINC(previous, 0, offset);
596365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
596465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
596565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        repeat_max--;
596665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
596765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
596865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If the minimum is greater than zero, replicate the group as many
596965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      times as necessary, and adjust the maximum to the number of subsequent
597065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      copies that we need. If we set a first char from the group, and didn't
597165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      set a required char, copy the latter from the former. If there are any
597265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      forward reference subroutine calls in the group, there will be entries on
597365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      the workspace list; replicate these with an appropriate increment. */
597465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
597565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else
597665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
597765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (repeat_min > 1)
597865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
597965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* In the pre-compile phase, we don't actually do the replication. We
598065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          just adjust the length as if we had. Do some paranoid checks for
598165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
598265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          integer type when available, otherwise double. */
598365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
598465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (lengthptr != NULL)
598565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
598665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            int delta = (repeat_min - 1)*length_prevgroup;
598765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if ((INT64_OR_DOUBLE)(repeat_min - 1)*
598865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  (INT64_OR_DOUBLE)length_prevgroup >
598965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                    (INT64_OR_DOUBLE)INT_MAX ||
599065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                OFLOW_MAX - *lengthptr < delta)
599165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
599265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR20;
599365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
599465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
599565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *lengthptr += delta;
599665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
599765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
599865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* This is compiling for real. If there is a set first byte for
599965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          the group, and we have not yet set a "required byte", set it. Make
600065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          sure there is enough workspace for copying forward references before
600165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          doing the copy. */
600265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
600365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else
600465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
600565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (groupsetfirstchar && reqcharflags < 0)
600665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
600765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              reqchar = firstchar;
600865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              reqcharflags = firstcharflags;
600965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
601065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
601165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            for (i = 1; i < repeat_min; i++)
601265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
601365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              pcre_uchar *hc;
601465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              pcre_uchar *this_hwm = cd->hwm;
601565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              memcpy(code, previous, IN_UCHARS(len));
601665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
601765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              while (cd->hwm > cd->start_workspace + cd->workspace_size -
601865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                     WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
601965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                {
602065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                size_t save_offset = save_hwm - cd->start_workspace;
602165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                size_t this_offset = this_hwm - cd->start_workspace;
602265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                *errorcodeptr = expand_workspace(cd);
602365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                if (*errorcodeptr != 0) goto FAILED;
602465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
602565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
602665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                }
602765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
602865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
602965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                {
603065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                PUT(cd->hwm, 0, GET(hc, 0) + len);
603165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                cd->hwm += LINK_SIZE;
603265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                }
603365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              save_hwm = this_hwm;
603465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              code += len;
603565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
603665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
603765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
603865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
603965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (repeat_max > 0) repeat_max -= repeat_min;
604065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
604165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
604265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* This code is common to both the zero and non-zero minimum cases. If
604365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      the maximum is limited, it replicates the group in a nested fashion,
604465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      remembering the bracket starts on a stack. In the case of a zero minimum,
604565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      the first one was set up above. In all cases the repeat_max now specifies
604665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      the number of additional copies needed. Again, we must remember to
604765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      replicate entries on the forward reference list. */
604865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
604965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (repeat_max >= 0)
605065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
605165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* In the pre-compile phase, we don't actually do the replication. We
605265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        just adjust the length as if we had. For each repetition we must add 1
605365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        to the length for BRAZERO and for all but the last repetition we must
605465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
605565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
605665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        a 64-bit integer type when available, otherwise double. */
605765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
605865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (lengthptr != NULL && repeat_max > 0)
605965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
606065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
606165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                      2 - 2*LINK_SIZE;   /* Last one doesn't nest */
606265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if ((INT64_OR_DOUBLE)repeat_max *
606365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
606465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  > (INT64_OR_DOUBLE)INT_MAX ||
606565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              OFLOW_MAX - *lengthptr < delta)
606665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
606765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR20;
606865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
606965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
607065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *lengthptr += delta;
607165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
607265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
607365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* This is compiling for real */
607465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
607565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else for (i = repeat_max - 1; i >= 0; i--)
607665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
607765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          pcre_uchar *hc;
607865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          pcre_uchar *this_hwm = cd->hwm;
607965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
608065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code++ = OP_BRAZERO + repeat_type;
608165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
608265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* All but the final copy start a new nesting, maintaining the
608365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          chain of brackets outstanding. */
608465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
608565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (i != 0)
608665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
608765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            int offset;
608865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = OP_BRA;
608965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            offset = (bralink == NULL)? 0 : (int)(code - bralink);
609065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            bralink = code;
609165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            PUTINC(code, 0, offset);
609265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
609365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
609465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          memcpy(code, previous, IN_UCHARS(len));
609565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
609665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Ensure there is enough workspace for forward references before
609765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          copying them. */
609865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
609965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          while (cd->hwm > cd->start_workspace + cd->workspace_size -
610065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                 WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
610165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
610265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            size_t save_offset = save_hwm - cd->start_workspace;
610365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            size_t this_offset = this_hwm - cd->start_workspace;
610465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = expand_workspace(cd);
610565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (*errorcodeptr != 0) goto FAILED;
610665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
610765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
610865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
610965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
611065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
611165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
611265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
611365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->hwm += LINK_SIZE;
611465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
611565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          save_hwm = this_hwm;
611665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          code += len;
611765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
611865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
611965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Now chain through the pending brackets, and fill in their length
612065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        fields (which are holding the chain links pro tem). */
612165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
612265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (bralink != NULL)
612365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
612465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          int oldlinkoffset;
612565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          int offset = (int)(code - bralink + 1);
612665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          pcre_uchar *bra = code - offset;
612765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          oldlinkoffset = GET(bra, 1);
612865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
612965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code++ = OP_KET;
613065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUTINC(code, 0, offset);
613165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUT(bra, 1, offset);
613265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
613365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
613465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
613565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If the maximum is unlimited, set a repeater in the final copy. For
613665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ONCE brackets, that's all we need to do. However, possessively repeated
613765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ONCE brackets can be converted into non-capturing brackets, as the
613865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
613965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      deal with possessive ONCEs specially.
614065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
614165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      Otherwise, when we are doing the actual compile phase, check to see
614265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      whether this group is one that could match an empty string. If so,
614365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
614465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      that runtime checking can be done. [This check is also applied to ONCE
614565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      groups at runtime, but in a different way.]
614665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
614765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      Then, if the quantifier was possessive and the bracket is not a
614865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      conditional, we convert the BRA code to the POS form, and the KET code to
614965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      KETRPOS. (It turns out to be convenient at runtime to detect this kind of
615065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      subpattern at both the start and at the end.) The use of special opcodes
615165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      makes it possible to reduce greatly the stack usage in pcre_exec(). If
615265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
615365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
615465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      Then, if the minimum number of matches is 1 or 0, cancel the possessive
615565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      flag so that the default action below, of wrapping everything inside
615665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      atomic brackets, does not happen. When the minimum is greater than 1,
615765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      there will be earlier copies of the group, and so we still have to wrap
615865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      the whole thing. */
615965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
616065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else
616165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
616265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        pcre_uchar *ketcode = code - 1 - LINK_SIZE;
616365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        pcre_uchar *bracode = ketcode - GET(ketcode, 1);
616465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
616565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Convert possessive ONCE brackets to non-capturing */
616665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
616765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
616865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            possessive_quantifier) *bracode = OP_BRA;
616965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
617065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* For non-possessive ONCE brackets, all we need to do is to
617165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        set the KET. */
617265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
617365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
617465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *ketcode = OP_KETRMAX + repeat_type;
617565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
617665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Handle non-ONCE brackets and possessive ONCEs (which have been
617765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        converted to non-capturing above). */
617865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
617965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
618065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
618165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* In the compile phase, check for empty string matching. */
618265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
618365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (lengthptr == NULL)
618465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
618565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            pcre_uchar *scode = bracode;
618665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            do
618765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
618865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
618965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                {
619065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                *bracode += OP_SBRA - OP_BRA;
619165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                break;
619265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                }
619365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              scode += GET(scode, 1);
619465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
619565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            while (*scode == OP_ALT);
619665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
619765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
619865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Handle possessive quantifiers. */
619965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
620065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (possessive_quantifier)
620165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
620265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* For COND brackets, we wrap the whole thing in a possessively
620365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            repeated non-capturing bracket, because we have not invented POS
620465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            versions of the COND opcodes. Because we are moving code along, we
620565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            must ensure that any pending recursive references are updated. */
620665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
620765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (*bracode == OP_COND || *bracode == OP_SCOND)
620865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
620965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              int nlen = (int)(code - bracode);
621065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *code = OP_END;
621165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm);
621265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
621365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              code += 1 + LINK_SIZE;
621465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              nlen += 1 + LINK_SIZE;
621565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *bracode = OP_BRAPOS;
621665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *code++ = OP_KETRPOS;
621765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              PUTINC(code, 0, nlen);
621865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              PUT(bracode, 1, nlen);
621965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
622065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
622165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
622265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
622365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            else
622465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
622565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *bracode += 1;              /* Switch to xxxPOS opcodes */
622665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *ketcode = OP_KETRPOS;
622765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
622865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
622965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* If the minimum is zero, mark it as possessive, then unset the
623065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            possessive flag when the minimum is 0 or 1. */
623165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
623265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
623365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (repeat_min < 2) possessive_quantifier = FALSE;
623465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
623565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
623665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Non-possessive quantifier */
623765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
623865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else *ketcode = OP_KETRMAX + repeat_type;
623965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
624065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
624165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
624265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
624365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If previous is OP_FAIL, it was generated by an empty class [] in
624465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    JavaScript mode. The other ways in which OP_FAIL can be generated, that is
624565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat"
624665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    error above. We can just ignore the repeat in JS case. */
624765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
624865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else if (*previous == OP_FAIL) goto END_REPEAT;
624965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
625065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Else there's some kind of shambles */
625165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
625265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
625365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
625465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR11;
625565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto FAILED;
625665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
625765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
625865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If the character following a repeat is '+', possessive_quantifier is
625965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    TRUE. For some opcodes, there are special alternative opcodes for this
626065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case. For anything else, we wrap the entire repeated item inside OP_ONCE
626165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    brackets. Logically, the '+' notation is just syntactic sugar, taken from
626265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    Sun's Java package, but the special opcodes can optimize it.
626365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
626465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    Some (but not all) possessively repeated subpatterns have already been
626565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    completely handled in the code just above. For them, possessive_quantifier
626665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    is always FALSE at this stage. Note that the repeated item starts at
626765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    tempcode, not at previous, which might be the first part of a string whose
626865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    (former) last char we repeated. */
626965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
627065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (possessive_quantifier)
627165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
627265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int len;
627365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
627465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Possessifying an EXACT quantifier has no effect, so we can ignore it.
627565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6},
627665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {5,}, or {5,10}). We skip over an EXACT item; if the length of what
627765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      remains is greater than zero, there's a further opcode that can be
627865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      handled. If not, do nothing, leaving the EXACT alone. */
627965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
628065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      switch(*tempcode)
628165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
628265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_TYPEEXACT:
628365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        tempcode += PRIV(OP_lengths)[*tempcode] +
628465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ((tempcode[1 + IMM2_SIZE] == OP_PROP
628565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
628665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
628765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
628865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* CHAR opcodes are used for exacts whose count is 1. */
628965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
629065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_CHAR:
629165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_CHARI:
629265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOT:
629365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTI:
629465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_EXACT:
629565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_EXACTI:
629665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTEXACT:
629765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTEXACTI:
629865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        tempcode += PRIV(OP_lengths)[*tempcode];
629965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
630065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (utf && HAS_EXTRALEN(tempcode[-1]))
630165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          tempcode += GET_EXTRALEN(tempcode[-1]);
630265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
630365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
630465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
630565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* For the class opcodes, the repeat operator appears at the end;
630665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        adjust tempcode to point to it. */
630765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
630865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_CLASS:
630965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NCLASS:
631065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        tempcode += 1 + 32/sizeof(pcre_uchar);
631165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
631265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
631365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
631465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_XCLASS:
631565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        tempcode += GET(tempcode, 1);
631665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
631765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
631865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
631965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
632065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If tempcode is equal to code (which points to the end of the repeated
632165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      item), it means we have skipped an EXACT item but there is no following
632265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In
632365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      all other cases, tempcode will be pointing to the repeat opcode, and will
632465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      be less than code, so the value of len will be greater than 0. */
632565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
632665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      len = (int)(code - tempcode);
632765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (len > 0)
632865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
632965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        unsigned int repcode = *tempcode;
633065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
633165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* There is a table for possessifying opcodes, all of which are less
633265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        than OP_CALLOUT. A zero entry means there is no possessified version.
633365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        */
633465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
633565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0)
633665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *tempcode = opcode_possessify[repcode];
633765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
633865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* For opcode without a special possessified version, wrap the item in
633965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ONCE brackets. Because we are moving code along, we must ensure that any
634065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        pending recursive references are updated. */
634165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
634265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
634365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
634465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code = OP_END;
634565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
634665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
634765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          code += 1 + LINK_SIZE;
634865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          len += 1 + LINK_SIZE;
634965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          tempcode[0] = OP_ONCE;
635065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code++ = OP_KET;
635165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUTINC(code, 0, len);
635265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUT(tempcode, 1, len);
635365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
635465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
635565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
635665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef NEVER
635765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (len > 0) switch (*tempcode)
635865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
635965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_STAR:  *tempcode = OP_POSSTAR; break;
636065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_PLUS:  *tempcode = OP_POSPLUS; break;
636165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_QUERY: *tempcode = OP_POSQUERY; break;
636265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_UPTO:  *tempcode = OP_POSUPTO; break;
636365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
636465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_STARI:  *tempcode = OP_POSSTARI; break;
636565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_PLUSI:  *tempcode = OP_POSPLUSI; break;
636665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_QUERYI: *tempcode = OP_POSQUERYI; break;
636765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_UPTOI:  *tempcode = OP_POSUPTOI; break;
636865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
636965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTSTAR:  *tempcode = OP_NOTPOSSTAR; break;
637065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTPLUS:  *tempcode = OP_NOTPOSPLUS; break;
637165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;
637265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTUPTO:  *tempcode = OP_NOTPOSUPTO; break;
637365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
637465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTSTARI:  *tempcode = OP_NOTPOSSTARI; break;
637565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTPLUSI:  *tempcode = OP_NOTPOSPLUSI; break;
637665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTQUERYI: *tempcode = OP_NOTPOSQUERYI; break;
637765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_NOTUPTOI:  *tempcode = OP_NOTPOSUPTOI; break;
637865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
637965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_TYPESTAR:  *tempcode = OP_TYPEPOSSTAR; break;
638065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_TYPEPLUS:  *tempcode = OP_TYPEPOSPLUS; break;
638165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break;
638265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_TYPEUPTO:  *tempcode = OP_TYPEPOSUPTO; break;
638365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
638465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_CRSTAR:   *tempcode = OP_CRPOSSTAR; break;
638565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_CRPLUS:   *tempcode = OP_CRPOSPLUS; break;
638665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_CRQUERY:  *tempcode = OP_CRPOSQUERY; break;
638765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case OP_CRRANGE:  *tempcode = OP_CRPOSRANGE; break;
638865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
638965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Because we are moving code along, we must ensure that any
639065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        pending recursive references are updated. */
639165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
639265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        default:
639365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code = OP_END;
639465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
639565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
639665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code += 1 + LINK_SIZE;
639765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        len += 1 + LINK_SIZE;
639865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        tempcode[0] = OP_ONCE;
639965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = OP_KET;
640065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        PUTINC(code, 0, len);
640165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        PUT(tempcode, 1, len);
640265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
640365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
640465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
640565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
640665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
640765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* In all case we no longer have a previous item. We also set the
640865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    "follows varying string" flag for subsequently encountered reqchars if
640965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    it isn't already set and we have just passed a varying length item. */
641065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
641165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    END_REPEAT:
641265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    previous = NULL;
641365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cd->req_varyopt |= reqvary;
641465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
641565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
641665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
641765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* ===================================================================*/
641865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Start of nested parenthesized sub-expression, or comment or lookahead or
641965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    lookbehind or option setting or condition or all the other extended
642065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    parenthesis forms.  */
642165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
642265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_LEFT_PARENTHESIS:
642365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    newoptions = options;
642465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skipbytes = 0;
642565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    bravalue = OP_CBRA;
642665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    save_hwm = cd->hwm;
642765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    reset_bracount = FALSE;
642865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
642965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* First deal with various "verbs" that can be introduced by '*'. */
643065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
643165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ptr++;
643265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
643365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0))))
643465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
643565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int i, namelen;
643665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int arglen = 0;
643765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      const char *vn = verbnames;
643865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      const pcre_uchar *name = ptr + 1;
643965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      const pcre_uchar *arg = NULL;
644065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      previous = NULL;
644165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ptr++;
644265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
644365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      namelen = (int)(ptr - name);
644465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
644565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* It appears that Perl allows any characters whatsoever, other than
644665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      a closing parenthesis, to appear in arguments, so we no longer insist on
644765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      letters, digits, and underscores. */
644865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
644965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (*ptr == CHAR_COLON)
645065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
645165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        arg = ++ptr;
645265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
645365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        arglen = (int)(ptr - arg);
645465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if ((unsigned int)arglen > MAX_MARK)
645565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
645665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR75;
645765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
645865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
645965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
646065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
646165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (*ptr != CHAR_RIGHT_PARENTHESIS)
646265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
646365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *errorcodeptr = ERR60;
646465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto FAILED;
646565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
646665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
646765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Scan the table of verb names */
646865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
646965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      for (i = 0; i < verbcount; i++)
647065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
647165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (namelen == verbs[i].len &&
647265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            STRNCMP_UC_C8(name, vn, namelen) == 0)
647365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
647465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          int setverb;
647565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
647665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Check for open captures before ACCEPT and convert it to
647765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ASSERT_ACCEPT if in an assertion. */
647865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
647965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (verbs[i].op == OP_ACCEPT)
648065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
648165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            open_capitem *oc;
648265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (arglen != 0)
648365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
648465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR59;
648565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
648665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
648765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->had_accept = TRUE;
648865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            for (oc = cd->open_caps; oc != NULL; oc = oc->next)
648965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
649065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *code++ = OP_CLOSE;
649165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              PUT2INC(code, 0, oc->number);
649265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
649365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            setverb = *code++ =
649465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
649565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
649665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* Do not set firstchar after *ACCEPT */
649765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
649865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
649965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
650065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Handle other cases with/without an argument */
650165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
650265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else if (arglen == 0)
650365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
650465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (verbs[i].op < 0)   /* Argument is mandatory */
650565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
650665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR66;
650765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
650865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
650965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            setverb = *code++ = verbs[i].op;
651065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
651165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
651265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else
651365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
651465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (verbs[i].op_arg < 0)   /* Argument is forbidden */
651565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
651665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR59;
651765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
651865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
651965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            setverb = *code++ = verbs[i].op_arg;
652065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = arglen;
652165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            memcpy(code, arg, IN_UCHARS(arglen));
652265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            code += arglen;
652365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = 0;
652465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
652565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
652665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          switch (setverb)
652765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
652865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case OP_THEN:
652965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case OP_THEN_ARG:
653065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->external_flags |= PCRE_HASTHEN;
653165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            break;
653265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
653365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case OP_PRUNE:
653465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case OP_PRUNE_ARG:
653565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case OP_SKIP:
653665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case OP_SKIP_ARG:
653765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->had_pruneorskip = TRUE;
653865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            break;
653965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
654065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
654165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;  /* Found verb, exit loop */
654265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
654365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
654465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        vn += verbs[i].len + 1;
654565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
654665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
654765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (i < verbcount) continue;    /* Successfully handled a verb */
654865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR60;          /* Verb not recognized */
654965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto FAILED;
655065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
655165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
655265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Deal with the extended parentheses; all are introduced by '?', and the
655365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    appearance of any of them means that this is not a capturing group. */
655465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
655565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else if (*ptr == CHAR_QUESTION_MARK)
655665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
655765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int i, set, unset, namelen;
655865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int *optset;
655965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      const pcre_uchar *name;
656065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      pcre_uchar *slot;
656165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
656265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      switch (*(++ptr))
656365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
656465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */
656565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;
656665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
656765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*ptr == CHAR_NULL)
656865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
656965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR18;
657065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
657165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
657265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        continue;
657365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
657465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
657565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
657665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
657765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        reset_bracount = TRUE;
657865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Fall through */
657965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
658065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
658165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_COLON:          /* Non-capturing bracket */
658265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        bravalue = OP_BRA;
658365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;
658465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
658565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
658665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
658765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
658865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_LEFT_PARENTHESIS:
658965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        bravalue = OP_COND;       /* Conditional group */
659065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        tempptr = ptr;
659165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
659265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* A condition can be an assertion, a number (referring to a numbered
659365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        group's having been set), a name (referring to a named group), or 'R',
659465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        referring to recursion. R<digits> and R&name are also permitted for
659565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        recursion tests.
659665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
659765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        There are ways of testing a named group: (?(name)) is used by Python;
659865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        Perl 5.10 onwards uses (?(<name>) or (?('name')).
659965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
660065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        There is one unfortunate ambiguity, caused by history. 'R' can be the
660165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        recursive thing or the name 'R' (and similarly for 'R' followed by
660265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        digits). We look for a name first; if not found, we try the other case.
660365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
660465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        For compatibility with auto-callouts, we allow a callout to be
660565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        specified before a condition that is an assertion. First, check for the
660665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        syntax of a callout; if found, adjust the temporary pointer that is
660765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        used to check for an assertion condition. That's all that is needed! */
660865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
660965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
661065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
661165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
661265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
661365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            tempptr += i + 1;
661465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
661565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
661665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* For conditions that are assertions, check the syntax, and then exit
661765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        the switch. This will take control down to where bracketed groups,
661865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        including assertions, are processed. */
661965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
662065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (tempptr[1] == CHAR_QUESTION_MARK &&
662165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              (tempptr[2] == CHAR_EQUALS_SIGN ||
662265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich               tempptr[2] == CHAR_EXCLAMATION_MARK ||
662365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich               tempptr[2] == CHAR_LESS_THAN_SIGN))
662465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
662565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
662665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all
662765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        need to skip at least 1+IMM2_SIZE bytes at the start of the group. */
662865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
662965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code[1+LINK_SIZE] = OP_CREF;
663065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        skipbytes = 1+IMM2_SIZE;
663165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        refsign = -1;     /* => not a number */
663265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        namelen = -1;     /* => not a name; must set to avoid warning */
663365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        name = NULL;      /* Always set to avoid warning */
663465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        recno = 0;        /* Always set to avoid warning */
663565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
663665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Check for a test for recursion in a named group. */
663765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
663865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;
663965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
664065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
664165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          terminator = -1;
664265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr += 2;
664365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          code[1+LINK_SIZE] = OP_RREF;    /* Change the type of test */
664465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
664565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
664665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Check for a test for a named group's having been set, using the Perl
664765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        syntax (?(<name>) or (?('name'), and also allow for the original PCRE
664865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). */
664965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
665065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (*ptr == CHAR_LESS_THAN_SIGN)
665165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
665265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          terminator = CHAR_GREATER_THAN_SIGN;
665365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr++;
665465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
665565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (*ptr == CHAR_APOSTROPHE)
665665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
665765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          terminator = CHAR_APOSTROPHE;
665865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr++;
665965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
666065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
666165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
666265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          terminator = CHAR_NULL;
666365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++;
666465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            else if (IS_DIGIT(*ptr)) refsign = 0;
666565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
666665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
666765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Handle a number */
666865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
666965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (refsign >= 0)
667065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
667165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          while (IS_DIGIT(*ptr))
667265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
667365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            recno = recno * 10 + (int)(*ptr - CHAR_0);
667465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            ptr++;
667565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
667665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
667765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
667865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Otherwise we expect to read a name; anything else is an error. When
667965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        a name is one of a number of duplicates, a different opcode is used and
668065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        it needs more memory. Unfortunately we cannot tell whether a name is a
668165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        duplicate in the first pass, so we have to allow for more memory. */
668265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
668365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
668465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
668565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (IS_DIGIT(*ptr))
668665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
668765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR84;
668865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
668965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
669065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_word) == 0)
669165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
669265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR28;   /* Assertion expected */
669365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
669465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
669565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          name = ptr++;
669665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)
669765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
669865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            ptr++;
669965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
670065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          namelen = (int)(ptr - name);
670165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (lengthptr != NULL && (options & PCRE_DUPNAMES) != 0)
670265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *lengthptr += IMM2_SIZE;
670365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
670465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
670565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Check the terminator */
670665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
670765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||
670865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *ptr++ != CHAR_RIGHT_PARENTHESIS)
670965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
671065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr--;                  /* Error offset */
671165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR26;  /* Malformed number or name */
671265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
671365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
671465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
671565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Do no further checking in the pre-compile phase. */
671665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
671765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (lengthptr != NULL) break;
671865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
671965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* In the real compile we do the work of looking for the actual
672065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        reference. If refsign is not negative, it means we have a number in
672165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        recno. */
672265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
672365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (refsign >= 0)
672465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
672565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (recno <= 0)
672665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
672765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR35;
672865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
672965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
673065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (refsign != 0) recno = (refsign == CHAR_MINUS)?
673165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->bracount - recno + 1 : recno + cd->bracount;
673265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (recno <= 0 || recno > cd->final_bracount)
673365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
673465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR15;
673565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
673665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
673765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUT2(code, 2+LINK_SIZE, recno);
673865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
673965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
674065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
674165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Otherwise look for the name. */
674265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
674365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        slot = cd->name_table;
674465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        for (i = 0; i < cd->names_found; i++)
674565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
674665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break;
674765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          slot += cd->name_entry_size;
674865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
674965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
675065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Found the named subpattern. If the name is duplicated, add one to
675165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        the opcode to change CREF/RREF into DNCREF/DNRREF and insert
675265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        appropriate data values. Otherwise, just insert the unique subpattern
675365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        number. */
675465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
675565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (i < cd->names_found)
675665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
675765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          int offset = i++;
675865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          int count = 1;
675965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          recno = GET2(slot, 0);   /* Number from first found */
676065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for (; i < cd->names_found; i++)
676165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
676265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            slot += cd->name_entry_size;
676365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0 ||
676465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              (slot+IMM2_SIZE)[namelen] != 0) break;
676565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            count++;
676665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
676765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
676865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (count > 1)
676965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
677065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            PUT2(code, 2+LINK_SIZE, offset);
677165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count);
677265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            skipbytes += IMM2_SIZE;
677365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            code[1+LINK_SIZE]++;
677465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
677565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else  /* Not a duplicated name */
677665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
677765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            PUT2(code, 2+LINK_SIZE, recno);
677865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
677965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
678065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
678165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* If terminator == CHAR_NULL it means that the name followed directly
678265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        after the opening parenthesis [e.g. (?(abc)...] and in this case there
678365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        are some further alternatives to try. For the cases where terminator !=
678465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        CHAR_NULL [things like (?(<name>... or (?('name')... or (?(R&name)... ]
678565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        we have now checked all the possibilities, so give an error. */
678665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
678765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (terminator != CHAR_NULL)
678865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
678965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR15;
679065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
679165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
679265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
679365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Check for (?(R) for recursion. Allow digits after R to specify a
679465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        specific group number. */
679565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
679665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (*name == CHAR_R)
679765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
679865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          recno = 0;
679965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for (i = 1; i < namelen; i++)
680065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
680165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (!IS_DIGIT(name[i]))
680265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
680365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR15;
680465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
680565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
680665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            recno = recno * 10 + name[i] - CHAR_0;
680765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
680865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (recno == 0) recno = RREF_ANY;
680965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          code[1+LINK_SIZE] = OP_RREF;      /* Change test type */
681065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUT2(code, 2+LINK_SIZE, recno);
681165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
681265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
681365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Similarly, check for the (?(DEFINE) "condition", which is always
681465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        false. */
681565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
681665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0)
681765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
681865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          code[1+LINK_SIZE] = OP_DEF;
681965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          skipbytes = 1;
682065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
682165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
682265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Reference to an unidentified subpattern. */
682365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
682465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
682565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
682665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR15;
682765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
682865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
682965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
683065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
683165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
683265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
683365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
683465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        bravalue = OP_ASSERT;
683565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        cd->assert_depth += 1;
683665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;
683765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
683865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
683965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird
684065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        thing to do, but Perl allows all assertions to be quantified, and when
684165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        they contain capturing parentheses there may be a potential use for
684265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        this feature. Not that that applies to a quantified (?!) but we allow
684365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        it for uniformity. */
684465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
684565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
684665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */
684765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;
684865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK &&
684965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK &&
685065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2)))
685165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
685265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code++ = OP_FAIL;
685365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          previous = NULL;
685465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          continue;
685565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
685665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        bravalue = OP_ASSERT_NOT;
685765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        cd->assert_depth += 1;
685865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
685965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
686065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
686165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
686265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_LESS_THAN_SIGN:              /* Lookbehind or named define */
686365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        switch (ptr[1])
686465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
686565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
686665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          bravalue = OP_ASSERTBACK;
686765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          cd->assert_depth += 1;
686865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr += 2;
686965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
687065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
687165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
687265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          bravalue = OP_ASSERTBACK_NOT;
687365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          cd->assert_depth += 1;
687465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr += 2;
687565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
687665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
687765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          default:                /* Could be name define, else bad */
687865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (MAX_255(ptr[1]) && (cd->ctypes[ptr[1]] & ctype_word) != 0)
687965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto DEFINE_NAME;
688065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr++;                  /* Correct offset for error */
688165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR24;
688265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
688365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
688465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
688565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
688665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
688765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
688865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_GREATER_THAN_SIGN:           /* One-time brackets */
688965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        bravalue = OP_ONCE;
689065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;
689165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
689265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
689365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
689465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
689565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_C:                 /* Callout - may be followed by digits; */
689665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        previous_callout = code;     /* Save for later completion */
689765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        after_manual_callout = 1;    /* Skip one item before completing */
689865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = OP_CALLOUT;
689965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
690065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          int n = 0;
690165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr++;
690265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          while(IS_DIGIT(*ptr))
690365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            n = n * 10 + *ptr++ - CHAR_0;
690465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (*ptr != CHAR_RIGHT_PARENTHESIS)
690565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
690665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR39;
690765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
690865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
690965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (n > 255)
691065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
691165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR38;
691265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
691365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
691465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code++ = n;
691565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUT(code, 0, (int)(ptr - cd->start_pattern + 1)); /* Pattern offset */
691665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUT(code, LINK_SIZE, 0);                          /* Default length */
691765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          code += 2 * LINK_SIZE;
691865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
691965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        previous = NULL;
692065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        continue;
692165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
692265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
692365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
692465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_P:              /* Python-style named subpattern handling */
692565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*(++ptr) == CHAR_EQUALS_SIGN ||
692665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */
692765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
692865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
692965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          terminator = CHAR_RIGHT_PARENTHESIS;
693065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto NAMED_REF_OR_RECURSE;
693165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
693265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else if (*ptr != CHAR_LESS_THAN_SIGN)  /* Test for Python-style defn */
693365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
693465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR41;
693565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
693665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
693765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Fall through to handle (?P< as (?< is handled */
693865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
693965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
694065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
694165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        DEFINE_NAME:    /* Come here from (?< handling */
694265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_APOSTROPHE:
694365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
694465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
694565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        name = ++ptr;
694665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (IS_DIGIT(*ptr))
694765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
694865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR84;   /* Group name must start with non-digit */
694965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
695065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
695165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
695265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        namelen = (int)(ptr - name);
695365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
695465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* In the pre-compile phase, do a syntax check, remember the longest
695565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        name, and then remember the group in a vector, expanding it if
695665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        necessary. Duplicates for the same number are skipped; other duplicates
695765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        are checked for validity. In the actual compile, there is nothing to
695865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        do. */
695965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
696065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (lengthptr != NULL)
696165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
696265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          named_group *ng;
696365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          pcre_uint32 number = cd->bracount + 1;
696465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
696565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (*ptr != (pcre_uchar)terminator)
696665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
696765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR42;
696865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
696965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
697065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
697165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (cd->names_found >= MAX_NAME_COUNT)
697265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
697365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR49;
697465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
697565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
697665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
697765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (namelen + IMM2_SIZE + 1 > cd->name_entry_size)
697865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
697965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->name_entry_size = namelen + IMM2_SIZE + 1;
698065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (namelen > MAX_NAME_SIZE)
698165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
698265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR48;
698365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
698465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
698565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
698665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
698765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Scan the list to check for duplicates. For duplicate names, if the
698865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          number is the same, break the loop, which causes the name to be
698965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          discarded; otherwise, if DUPNAMES is not set, give an error.
699065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          If it is set, allow the name with a different number, but continue
699165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          scanning in case this is a duplicate with the same number. For
699265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          non-duplicate names, give an error if the number is duplicated. */
699365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
699465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ng = cd->named_groups;
699565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for (i = 0; i < cd->names_found; i++, ng++)
699665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
699765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (namelen == ng->length &&
699865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                STRNCMP_UC_UC(name, ng->name, namelen) == 0)
699965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
700065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              if (ng->number == number) break;
700165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              if ((options & PCRE_DUPNAMES) == 0)
700265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                {
700365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                *errorcodeptr = ERR43;
700465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                goto FAILED;
700565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                }
700665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              cd->dupnames = TRUE;  /* Duplicate names exist */
700765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
700865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            else if (ng->number == number)
700965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
701065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR65;
701165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
701265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
701365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
701465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
701565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (i >= cd->names_found)     /* Not a duplicate with same number */
701665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
701765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* Increase the list size if necessary */
701865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
701965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (cd->names_found >= cd->named_group_list_size)
702065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
702165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              int newsize = cd->named_group_list_size * 2;
702265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              named_group *newspace = (PUBL(malloc))
702365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                (newsize * sizeof(named_group));
702465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
702565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              if (newspace == NULL)
702665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                {
702765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                *errorcodeptr = ERR21;
702865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                goto FAILED;
702965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                }
703065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
703165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              memcpy(newspace, cd->named_groups,
703265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                cd->named_group_list_size * sizeof(named_group));
703365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
703465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                (PUBL(free))((void *)cd->named_groups);
703565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              cd->named_groups = newspace;
703665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              cd->named_group_list_size = newsize;
703765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
703865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
703965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->named_groups[cd->names_found].name = name;
704065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->named_groups[cd->names_found].length = namelen;
704165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->named_groups[cd->names_found].number = number;
704265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->names_found++;
704365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
704465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
704565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
704665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;                    /* Move past > or ' in both passes. */
704765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto NUMBERED_GROUP;
704865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
704965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
705065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
705165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_AMPERSAND:            /* Perl recursion/subroutine syntax */
705265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        terminator = CHAR_RIGHT_PARENTHESIS;
705365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        is_recurse = TRUE;
705465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Fall through */
705565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
705665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* We come here from the Python syntax above that handles both
705765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        references (?P=name) and recursion (?P>name), as well as falling
705865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        through from the Perl recursion syntax (?&name). We also come here from
705965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        the Perl \k<name> or \k'name' back reference syntax and the \k{name}
706065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */
706165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
706265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        NAMED_REF_OR_RECURSE:
706365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        name = ++ptr;
706465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (IS_DIGIT(*ptr))
706565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
706665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR84;   /* Group name must start with non-digit */
706765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
706865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
706965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
707065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        namelen = (int)(ptr - name);
707165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
707265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* In the pre-compile phase, do a syntax check. We used to just set
707365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        a dummy reference number, because it was not used in the first pass.
707465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        However, with the change of recursive back references to be atomic,
707565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        we have to look for the number so that this state can be identified, as
707665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        otherwise the incorrect length is computed. If it's not a backwards
707765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        reference, the dummy number will do. */
707865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
707965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (lengthptr != NULL)
708065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
708165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          named_group *ng;
708265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
708365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (namelen == 0)
708465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
708565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR62;
708665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
708765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
708865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (*ptr != (pcre_uchar)terminator)
708965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
709065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR42;
709165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
709265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
709365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (namelen > MAX_NAME_SIZE)
709465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
709565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR48;
709665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
709765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
709865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
709965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* The name table does not exist in the first pass; instead we must
710065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          scan the list of names encountered so far in order to get the
710165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          number. If the name is not found, set the value to 0 for a forward
710265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          reference. */
710365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
710465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ng = cd->named_groups;
710565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for (i = 0; i < cd->names_found; i++, ng++)
710665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
710765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (namelen == ng->length &&
710865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                STRNCMP_UC_UC(name, ng->name, namelen) == 0)
710965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              break;
711065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
711165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          recno = (i < cd->names_found)? ng->number : 0;
711265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
711365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Count named back references. */
711465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
711565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (!is_recurse) cd->namedrefcount++;
711665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
711765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* If duplicate names are permitted, we have to allow for a named
711865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          reference to a duplicated name (this cannot be determined until the
711965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          second pass). This needs an extra 16-bit data item. */
712065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
712165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if ((options & PCRE_DUPNAMES) != 0) *lengthptr += IMM2_SIZE;
712265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
712365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
712465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* In the real compile, search the name table. We check the name
712565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        first, and then check that we have reached the end of the name in the
712665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        table. That way, if the name is longer than any in the table, the
712765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        comparison will fail without reading beyond the table entry. */
712865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
712965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
713065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
713165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          slot = cd->name_table;
713265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for (i = 0; i < cd->names_found; i++)
713365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
713465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&
713565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                slot[IMM2_SIZE+namelen] == 0)
713665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              break;
713765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            slot += cd->name_entry_size;
713865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
713965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
714065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (i < cd->names_found)
714165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
714265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            recno = GET2(slot, 0);
714365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
714465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else
714565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
714665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR15;
714765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
714865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
714965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
715065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
715165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* In both phases, for recursions, we can now go to the code than
715265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        handles numerical recursion. */
715365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
715465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (is_recurse) goto HANDLE_RECURSION;
715565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
715665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* In the second pass we must see if the name is duplicated. If so, we
715765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        generate a different opcode. */
715865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
715965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (lengthptr == NULL && cd->dupnames)
716065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
716165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          int count = 1;
716265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          unsigned int index = i;
716365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          pcre_uchar *cslot = slot + cd->name_entry_size;
716465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
716565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          for (i++; i < cd->names_found; i++)
716665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
716765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
716865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
716965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
717065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            count++;
717165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cslot += cd->name_entry_size;
717265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
717365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
717465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (count > 1)
717565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
717665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
717765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            previous = code;
717865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
717965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            PUT2INC(code, 0, index);
718065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            PUT2INC(code, 0, count);
718165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
718265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* Process each potentially referenced group. */
718365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
718465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            for (; slot < cslot; slot += cd->name_entry_size)
718565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
718665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              open_capitem *oc;
718765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              recno = GET2(slot, 0);
718865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              cd->backref_map |= (recno < 32)? (1 << recno) : 1;
718965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              if (recno > cd->top_backref) cd->top_backref = recno;
719065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
719165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              /* Check to see if this back reference is recursive, that it, it
719265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              is inside the group that it references. A flag is set so that the
719365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              group can be made atomic. */
719465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
719565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              for (oc = cd->open_caps; oc != NULL; oc = oc->next)
719665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                {
719765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                if (oc->number == recno)
719865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  {
719965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  oc->flag = TRUE;
720065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  break;
720165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  }
720265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                }
720365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
720465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
720565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            continue;  /* End of back ref handling */
720665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
720765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
720865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
720965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* First pass, or a non-duplicated name. */
721065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
721165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto HANDLE_REFERENCE;
721265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
721365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
721465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
721565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_R:              /* Recursion */
721665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;                    /* Same as (?0)      */
721765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Fall through */
721865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
721965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
722065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
722165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_MINUS: case CHAR_PLUS:  /* Recursion or subroutine */
722265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
722365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
722465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
722565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          const pcre_uchar *called;
722665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          terminator = CHAR_RIGHT_PARENTHESIS;
722765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
722865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Come here from the \g<...> and \g'...' code (Oniguruma
722965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          compatibility). However, the syntax has been checked to ensure that
723065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          the ... are a (signed) number, so that neither ERR63 nor ERR29 will
723165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY
723265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ever be taken. */
723365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
723465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          HANDLE_NUMERICAL_RECURSION:
723565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
723665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if ((refsign = *ptr) == CHAR_PLUS)
723765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
723865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            ptr++;
723965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (!IS_DIGIT(*ptr))
724065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
724165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR63;
724265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
724365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
724465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
724565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else if (refsign == CHAR_MINUS)
724665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
724765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (!IS_DIGIT(ptr[1]))
724865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto OTHER_CHAR_AFTER_QUERY;
724965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            ptr++;
725065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
725165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
725265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          recno = 0;
725365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          while(IS_DIGIT(*ptr))
725465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            recno = recno * 10 + *ptr++ - CHAR_0;
725565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
725665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (*ptr != (pcre_uchar)terminator)
725765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
725865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *errorcodeptr = ERR29;
725965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            goto FAILED;
726065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
726165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
726265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (refsign == CHAR_MINUS)
726365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
726465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (recno == 0)
726565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
726665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR58;
726765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
726865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
726965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            recno = cd->bracount - recno + 1;
727065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (recno <= 0)
727165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
727265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR15;
727365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
727465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
727565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
727665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else if (refsign == CHAR_PLUS)
727765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
727865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (recno == 0)
727965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
728065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR58;
728165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
728265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
728365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            recno += cd->bracount;
728465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
728565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
728665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Come here from code above that handles a named recursion */
728765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
728865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          HANDLE_RECURSION:
728965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
729065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          previous = code;
729165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          called = cd->start_code;
729265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
729365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* When we are actually compiling, find the bracket that is being
729465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          referenced. Temporarily end the regex in case it doesn't exist before
729565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          this point. If we end up with a forward reference, first check that
729665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          the bracket does occur later so we can give the error (and position)
729765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          now. Then remember this forward reference in the workspace so it can
729865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          be filled in at the end. */
729965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
730065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (lengthptr == NULL)
730165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
730265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *code = OP_END;
730365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (recno != 0)
730465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              called = PRIV(find_bracket)(cd->start_code, utf, recno);
730565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
730665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* Forward reference */
730765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
730865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (called == NULL)
730965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
731065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              if (recno > cd->final_bracount)
731165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                {
731265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                *errorcodeptr = ERR15;
731365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                goto FAILED;
731465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                }
731565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
731665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              /* Fudge the value of "called" so that when it is inserted as an
731765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              offset below, what it actually inserted is the reference number
731865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              of the group. Then remember the forward reference. */
731965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
732065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              called = cd->start_code + recno;
732165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              if (cd->hwm >= cd->start_workspace + cd->workspace_size -
732265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  WORK_SIZE_SAFETY_MARGIN)
732365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                {
732465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                *errorcodeptr = expand_workspace(cd);
732565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                if (*errorcodeptr != 0) goto FAILED;
732665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                }
732765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code));
732865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
732965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
733065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            /* If not a forward reference, and the subpattern is still open,
733165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            this is a recursive call. We check to see if this is a left
733265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            recursion that could loop for ever, and diagnose that case. We
733365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            must not, however, do this check if we are in a conditional
733465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            subpattern because the condition might be testing for recursion in
733565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            a pattern such as /(?(R)a+|(?R)b)/, which is perfectly valid.
733665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            Forever loops are also detected at runtime, so those that occur in
733765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            conditional subpatterns will be picked up then. */
733865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
733965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            else if (GET(called, 1) == 0 && cond_depth <= 0 &&
734065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                     could_be_empty(called, code, bcptr, utf, cd))
734165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              {
734265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              *errorcodeptr = ERR40;
734365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              goto FAILED;
734465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              }
734565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
734665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
734765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Insert the recursion/subroutine item. It does not have a set first
734865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          character (relevant if it is repeated, because it will then be
734965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          wrapped with ONCE brackets). */
735065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
735165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code = OP_RECURSE;
735265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          PUT(code, 1, (int)(called - cd->start_code));
735365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          code += 1 + LINK_SIZE;
735465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          groupsetfirstchar = FALSE;
735565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
735665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
735765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Can't determine a first byte now */
735865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
735965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
736065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        continue;
736165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
736265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
736365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* ------------------------------------------------------------ */
736465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        default:              /* Other characters: check option setting */
736565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        OTHER_CHAR_AFTER_QUERY:
736665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        set = unset = 0;
736765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        optset = &set;
736865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
736965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
737065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
737165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          switch (*ptr++)
737265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
737365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case CHAR_MINUS: optset = &unset; break;
737465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
737565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case CHAR_J:    /* Record that it changed in the external options */
737665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            *optset |= PCRE_DUPNAMES;
737765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->external_flags |= PCRE_JCHANGED;
737865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            break;
737965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
738065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case CHAR_i: *optset |= PCRE_CASELESS; break;
738165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case CHAR_m: *optset |= PCRE_MULTILINE; break;
738265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case CHAR_s: *optset |= PCRE_DOTALL; break;
738365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case CHAR_x: *optset |= PCRE_EXTENDED; break;
738465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case CHAR_U: *optset |= PCRE_UNGREEDY; break;
738565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            case CHAR_X: *optset |= PCRE_EXTRA; break;
738665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
738765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            default:  *errorcodeptr = ERR12;
738865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                      ptr--;    /* Correct the offset */
738965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                      goto FAILED;
739065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
739165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
739265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
739365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Set up the changed option bits, but don't change anything yet. */
739465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
739565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        newoptions = (options | set) & (~unset);
739665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
739765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* If the options ended with ')' this is not the start of a nested
739865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        group with option changes, so the options change at this level. If this
739965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        item is right at the start of the pattern, the options can be
740065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        abstracted and made external in the pre-compile phase, and ignored in
740165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        the compile phase. This can be helpful when matching -- for instance in
740265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        caseless checking of required bytes.
740365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
740465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are
740565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        definitely *not* at the start of the pattern because something has been
740665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        compiled. In the pre-compile phase, however, the code pointer can have
740765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        that value after the start, because it gets reset as code is discarded
740865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        during the pre-compile. However, this can happen only at top level - if
740965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        we are within parentheses, the starting BRA will still be present. At
741065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        any parenthesis level, the length value can be used to test if anything
741165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        has been compiled at that level. Thus, a test for both these conditions
741265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        is necessary to ensure we correctly detect the start of the pattern in
741365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        both phases.
741465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
741565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        If we are not at the pattern start, reset the greedy defaults and the
741665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        case value for firstchar and reqchar. */
741765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
741865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*ptr == CHAR_RIGHT_PARENTHESIS)
741965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
742065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (code == cd->start_code + 1 + LINK_SIZE &&
742165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich               (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
742265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
742365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            cd->external_options = newoptions;
742465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
742565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else
742665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
742765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
742865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            greedy_non_default = greedy_default ^ 1;
742965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
743065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
743165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
743265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          /* Change options at this level, and pass them back for use
743365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          in subsequent branches. */
743465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
743565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *optionsptr = options = newoptions;
743665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          previous = NULL;       /* This item can't be repeated */
743765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          continue;              /* It is complete */
743865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
743965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
744065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* If the options ended with ':' we are heading into a nested group
744165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        with possible change of options. Such groups are non-capturing and are
744265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        not assertions of any kind. All we need to do is skip over the ':';
744365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        the newoptions value is handled below. */
744465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
744565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        bravalue = OP_BRA;
744665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;
744765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }     /* End of switch for character following (? */
744865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }       /* End of (? handling */
744965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
745065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Opening parenthesis not followed by '*' or '?'. If PCRE_NO_AUTO_CAPTURE
745165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    is set, all unadorned brackets become non-capturing and behave like (?:...)
745265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    brackets. */
745365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
745465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else if ((options & PCRE_NO_AUTO_CAPTURE) != 0)
745565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
745665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      bravalue = OP_BRA;
745765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
745865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
745965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Else we have a capturing group. */
746065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
746165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
746265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
746365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      NUMBERED_GROUP:
746465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      cd->bracount += 1;
746565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      PUT2(code, 1+LINK_SIZE, cd->bracount);
746665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      skipbytes = IMM2_SIZE;
746765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
746865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
746965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Process nested bracketed regex. First check for parentheses nested too
747065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    deeply. */
747165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
747265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if ((cd->parens_depth += 1) > PARENS_NEST_LIMIT)
747365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
747465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR82;
747565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto FAILED;
747665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
747765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
747865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Assertions used not to be repeatable, but this was changed for Perl
747965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    compatibility, so all kinds can now be repeated. We copy code into a
748065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    non-register variable (tempcode) in order to be able to pass its address
748165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    because some compilers complain otherwise. */
748265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
748365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    previous = code;                      /* For handling repetition */
748465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *code = bravalue;
748565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    tempcode = code;
748665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    tempreqvary = cd->req_varyopt;        /* Save value before bracket */
748765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    tempbracount = cd->bracount;          /* Save value before bracket */
748865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    length_prevgroup = 0;                 /* Initialize for pre-compile phase */
748965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
749065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (!compile_regex(
749165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         newoptions,                      /* The complete new option state */
749265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         &tempcode,                       /* Where to put code (updated) */
749365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         &ptr,                            /* Input pointer (updated) */
749465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         errorcodeptr,                    /* Where to put an error message */
749565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         (bravalue == OP_ASSERTBACK ||
749665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
749765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         reset_bracount,                  /* True if (?| group */
749865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         skipbytes,                       /* Skip over bracket number */
749965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         cond_depth +
750065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich           ((bravalue == OP_COND)?1:0),   /* Depth of condition subpatterns */
750165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         &subfirstchar,                   /* For possible first char */
750265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         &subfirstcharflags,
750365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         &subreqchar,                     /* For possible last char */
750465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         &subreqcharflags,
750565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         bcptr,                           /* Current branch chain */
750665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         cd,                              /* Tables block */
750765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         (lengthptr == NULL)? NULL :      /* Actual compile phase */
750865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich           &length_prevgroup              /* Pre-compile phase */
750965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         ))
751065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto FAILED;
751165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
751265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cd->parens_depth -= 1;
751365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
751465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If this was an atomic group and there are no capturing groups within it,
751565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    generate OP_ONCE_NC instead of OP_ONCE. */
751665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
751765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (bravalue == OP_ONCE && cd->bracount <= tempbracount)
751865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *code = OP_ONCE_NC;
751965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
752065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
752165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      cd->assert_depth -= 1;
752265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
752365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* At the end of compiling, code is still pointing to the start of the
752465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    group, while tempcode has been updated to point past the end of the group.
752565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    The pattern pointer (ptr) is on the bracket.
752665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
752765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    If this is a conditional bracket, check that there are no more than
752865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    two branches in the group, or just one if it's a DEFINE group. We do this
752965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    in the real compile phase, not in the pre-pass, where the whole group may
753065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    not be available. */
753165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
753265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (bravalue == OP_COND && lengthptr == NULL)
753365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
753465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      pcre_uchar *tc = code;
753565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int condcount = 0;
753665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
753765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      do {
753865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         condcount++;
753965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         tc += GET(tc,1);
754065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         }
754165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (*tc != OP_KET);
754265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
754365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* A DEFINE group is never obeyed inline (the "condition" is always
754465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      false). It must have only one branch. */
754565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
754665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (code[LINK_SIZE+1] == OP_DEF)
754765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
754865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (condcount > 1)
754965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
755065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR54;
755165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
755265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
755365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        bravalue = OP_DEF;   /* Just a flag to suppress char handling below */
755465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
755565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
755665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* A "normal" conditional group. If there is just one branch, we must not
755765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      make use of its firstchar or reqchar, because this is equivalent to an
755865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      empty second branch. */
755965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
756065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else
756165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
756265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (condcount > 2)
756365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
756465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR27;
756565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
756665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
756765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (condcount == 1) subfirstcharflags = subreqcharflags = REQ_NONE;
756865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
756965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
757065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
757165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Error if hit end of pattern */
757265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
757365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (*ptr != CHAR_RIGHT_PARENTHESIS)
757465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
757565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *errorcodeptr = ERR14;
757665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      goto FAILED;
757765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
757865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
757965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* In the pre-compile phase, update the length by the length of the group,
758065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    less the brackets at either end. Then reduce the compiled code to just a
758165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    set of non-capturing brackets so that it doesn't use much memory if it is
758265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    duplicated by a quantifier.*/
758365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
758465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (lengthptr != NULL)
758565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
758665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
758765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
758865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *errorcodeptr = ERR20;
758965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto FAILED;
759065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
759165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
759265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      code++;   /* This already contains bravalue */
759365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      PUTINC(code, 0, 1 + LINK_SIZE);
759465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *code++ = OP_KET;
759565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      PUTINC(code, 0, 1 + LINK_SIZE);
759665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      break;    /* No need to waste time with special character handling */
759765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
759865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
759965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Otherwise update the main code pointer to the end of the group. */
760065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
760165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code = tempcode;
760265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
760365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* For a DEFINE group, required and first character settings are not
760465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    relevant. */
760565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
760665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (bravalue == OP_DEF) break;
760765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
760865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Handle updating of the required and first characters for other types of
760965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    group. Update for normal brackets of all kinds, and conditions with two
761065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    branches (see code above). If the bracket is followed by a quantifier with
761165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zero repeat, we have to back off. Hence the definition of zeroreqchar and
761265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zerofirstchar outside the main loop so that they can be accessed for the
761365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    back off. */
761465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
761565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zeroreqchar = reqchar;
761665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zeroreqcharflags = reqcharflags;
761765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zerofirstchar = firstchar;
761865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    zerofirstcharflags = firstcharflags;
761965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    groupsetfirstchar = FALSE;
762065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
762165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (bravalue >= OP_ONCE)
762265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
762365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If we have not yet set a firstchar in this branch, take it from the
762465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      subpattern, remembering that it was set here so that a repeat of more
762565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      than one can replicate it as reqchar if necessary. If the subpattern has
762665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      no firstchar, set "none" for the whole branch. In both cases, a zero
762765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      repeat forces firstchar to "none". */
762865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
762965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (firstcharflags == REQ_UNSET)
763065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
763165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (subfirstcharflags >= 0)
763265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
763365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          firstchar = subfirstchar;
763465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          firstcharflags = subfirstcharflags;
763565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          groupsetfirstchar = TRUE;
763665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
763765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else firstcharflags = REQ_NONE;
763865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        zerofirstcharflags = REQ_NONE;
763965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
764065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
764165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If firstchar was previously set, convert the subpattern's firstchar
764265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      into reqchar if there wasn't one, using the vary flag that was in
764365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      existence beforehand. */
764465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
764565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (subfirstcharflags >= 0 && subreqcharflags < 0)
764665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
764765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        subreqchar = subfirstchar;
764865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        subreqcharflags = subfirstcharflags | tempreqvary;
764965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
765065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
765165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If the subpattern set a required byte (or set a first byte that isn't
765265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      really the first byte - see above), set it. */
765365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
765465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (subreqcharflags >= 0)
765565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
765665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        reqchar = subreqchar;
765765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        reqcharflags = subreqcharflags;
765865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
765965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
766065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
766165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* For a forward assertion, we take the reqchar, if set. This can be
766265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    helpful if the pattern that follows the assertion doesn't set a different
766365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    char. For example, it's useful for /(?=abcde).+/. We can't set firstchar
766465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    for an assertion, however because it leads to incorrect effect for patterns
766565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    such as /(?=a)a.+/ when the "real" "a" would then become a reqchar instead
766665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    of a firstchar. This is overcome by a scan at the end if there's no
766765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    firstchar, looking for an asserted first char. */
766865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
766965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else if (bravalue == OP_ASSERT && subreqcharflags >= 0)
767065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
767165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      reqchar = subreqchar;
767265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      reqcharflags = subreqcharflags;
767365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
767465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;     /* End of processing '(' */
767565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
767665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
767765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* ===================================================================*/
767865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Handle metasequences introduced by \. For ones like \d, the ESC_ values
767965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    are arranged to be the negation of the corresponding OP_values in the
768065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    default case when PCRE_UCP is not set. For the back references, the values
768165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    are negative the reference number. Only back references and those types
768265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    that consume a character may be repeated. We can test for values between
768365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ESC_b and ESC_Z for the latter; this may have to change if any new ones are
768465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ever created. */
768565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
768665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case CHAR_BACKSLASH:
768765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    tempptr = ptr;
768865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);
768965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (*errorcodeptr != 0) goto FAILED;
769065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
769165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (escape == 0)                  /* The escape coded a single character */
769265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c = ec;
769365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
769465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
769565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (escape == ESC_Q)            /* Handle start of quoted string */
769665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
769765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
769865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr += 2;               /* avoid empty string */
769965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            else inescq = TRUE;
770065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        continue;
770165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
770265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
770365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (escape == ESC_E) continue;  /* Perl ignores an orphan \E */
770465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
770565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* For metasequences that actually match a character, we disable the
770665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      setting of a first character if it hasn't already been set. */
770765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
770865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (firstcharflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z)
770965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        firstcharflags = REQ_NONE;
771065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
771165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Set values to reset to if this is followed by a zero repeat. */
771265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
771365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zerofirstchar = firstchar;
771465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zerofirstcharflags = firstcharflags;
771565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zeroreqchar = reqchar;
771665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zeroreqcharflags = reqcharflags;
771765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
771865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
771965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      is a subroutine call by number (Oniguruma syntax). In fact, the value
772065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ESC_g is returned only for these cases. So we don't need to check for <
772165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      or ' if the value is ESC_g. For the Perl syntax \g{n} the value is
772265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      -n, and for the Perl syntax \g{name} the result is ESC_k (as
772365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      that is a synonym for a named back reference). */
772465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
772565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (escape == ESC_g)
772665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
772765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        const pcre_uchar *p;
772865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        pcre_uint32 cf;
772965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
773065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
773165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
773265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
773365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
773465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* These two statements stop the compiler for warning about possibly
773565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
773665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        fact, because we do the check for a number below, the paths that
773765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        would actually be in error are never taken. */
773865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
773965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        skipbytes = 0;
774065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        reset_bracount = FALSE;
774165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
774265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* If it's not a signed or unsigned number, treat it as a name. */
774365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
774465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        cf = ptr[1];
774565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (cf != CHAR_PLUS && cf != CHAR_MINUS && !IS_DIGIT(cf))
774665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
774765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          is_recurse = TRUE;
774865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto NAMED_REF_OR_RECURSE;
774965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
775065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
775165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Signed or unsigned number (cf = ptr[1]) is known to be plus or minus
775265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        or a digit. */
775365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
775465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        p = ptr + 2;
775565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        while (IS_DIGIT(*p)) p++;
775665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (*p != (pcre_uchar)terminator)
775765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
775865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR57;
775965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
776065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
776165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ptr++;
776265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto HANDLE_NUMERICAL_RECURSION;
776365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
776465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
776565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* \k<name> or \k'name' is a back reference by name (Perl syntax).
776665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      We also support \k{name} (.NET syntax).  */
776765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
776865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (escape == ESC_k)
776965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
777065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if ((ptr[1] != CHAR_LESS_THAN_SIGN &&
777165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
777265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
777365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *errorcodeptr = ERR69;
777465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          break;
777565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
777665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        is_recurse = FALSE;
777765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
777865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
777965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
778065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto NAMED_REF_OR_RECURSE;
778165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
778265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
778365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Back references are handled specially; must disable firstchar if
778465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      not set to cope with cases like (?=(\w+))\1: which would otherwise set
778565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ':' later. */
778665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
778765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (escape < 0)
778865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
778965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        open_capitem *oc;
779065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        recno = -escape;
779165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
779265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Come here from named backref handling when the reference is to a
779365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        single group (i.e. not to a duplicated name. */
779465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
779565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        HANDLE_REFERENCE:
779665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
779765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        previous = code;
779865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
779965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        PUT2INC(code, 0, recno);
780065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        cd->backref_map |= (recno < 32)? (1 << recno) : 1;
780165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (recno > cd->top_backref) cd->top_backref = recno;
780265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
780365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* Check to see if this back reference is recursive, that it, it
780465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        is inside the group that it references. A flag is set so that the
780565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        group can be made atomic. */
780665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
780765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        for (oc = cd->open_caps; oc != NULL; oc = oc->next)
780865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
780965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (oc->number == recno)
781065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
781165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            oc->flag = TRUE;
781265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            break;
781365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
781465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
781565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
781665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
781765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* So are Unicode property matches, if supported. */
781865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
781965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
782065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (escape == ESC_P || escape == ESC_p)
782165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
782265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        BOOL negated;
782365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        unsigned int ptype = 0, pdata = 0;
782465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
782565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          goto FAILED;
782665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        previous = code;
782765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
782865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = ptype;
782965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = pdata;
783065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
783165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
783265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
783365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If Unicode properties are not supported, \X, \P, and \p are not
783465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      allowed. */
783565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
783665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (escape == ESC_X || escape == ESC_P || escape == ESC_p)
783765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
783865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *errorcodeptr = ERR45;
783965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        goto FAILED;
784065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
784165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
784265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
784365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* For the rest (including \X when Unicode properties are supported), we
784465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      can obtain the OP value by negating the escape value in the default
784565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      situation when PCRE_UCP is not set. When it *is* set, we substitute
784665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      Unicode property tests. Note that \b and \B do a one-character
784765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      lookbehind, and \A also behaves as if it does. */
784865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
784965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else
785065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
785165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
785265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             cd->max_lookbehind == 0)
785365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          cd->max_lookbehind = 1;
785465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
785565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (escape >= ESC_DU && escape <= ESC_wu)
785665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
785765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          nestptr = ptr + 1;                   /* Where to resume */
785865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          ptr = substitutes[escape - ESC_DU] - 1;  /* Just before substitute */
785965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
786065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
786165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
786265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE
786365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        so that it works in DFA mode and in lookbehinds. */
786465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
786565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
786665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
786765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
786865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
786965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
787065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      continue;
787165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
787265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
787365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* We have a data character whose value is in c. In UTF-8 mode it may have
787465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    a value > 127. We set its representation in the length/buffer, and then
787565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    handle it as a data character. */
787665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
787765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
787865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
787965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      mclength = PRIV(ord2utf)(c, mcbuffer);
788065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
788165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
788265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
788365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
788465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     mcbuffer[0] = c;
788565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     mclength = 1;
788665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
788765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    goto ONE_CHAR;
788865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
788965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
789065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* ===================================================================*/
789165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Handle a literal character. It is guaranteed not to be whitespace or #
789265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    when the extended flag is set. If we are in a UTF mode, it may be a
789365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    multi-unit literal character. */
789465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
789565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    default:
789665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    NORMAL_CHAR:
789765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    mclength = 1;
789865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    mcbuffer[0] = c;
789965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
790065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
790165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (utf && HAS_EXTRALEN(c))
790265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
790365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
790465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
790565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* At this point we have the character's bytes in mcbuffer, and the length
790665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    in mclength. When not in UTF-8 mode, the length is always 1. */
790765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
790865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ONE_CHAR:
790965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    previous = code;
791065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
791165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* For caseless UTF-8 mode when UCP support is available, check whether
791265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    this character has more than one other case. If so, generate a special
791365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    OP_PROP item instead of OP_CHARI. */
791465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
791565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP
791665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (utf && (options & PCRE_CASELESS) != 0)
791765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
791865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      GETCHAR(c, mcbuffer);
791965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if ((c = UCD_CASESET(c)) != 0)
792065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
792165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = OP_PROP;
792265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = PT_CLIST;
792365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code++ = c;
792465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (firstcharflags == REQ_UNSET)
792565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          firstcharflags = zerofirstcharflags = REQ_NONE;
792665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
792765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
792865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
792965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
793065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
793165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Caseful matches, or not one of the multicase characters. */
793265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
793365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR;
793465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
793565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
793665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Remember if \r or \n were seen */
793765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
793865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
793965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      cd->external_flags |= PCRE_HASCRORLF;
794065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
794165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Set the first and required bytes appropriately. If no previous first
794265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    byte, set it from this character, but revert to none on a zero repeat.
794365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    Otherwise, leave the firstchar value alone, and don't change it on a zero
794465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    repeat. */
794565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
794665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (firstcharflags == REQ_UNSET)
794765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
794865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zerofirstcharflags = REQ_NONE;
794965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zeroreqchar = reqchar;
795065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zeroreqcharflags = reqcharflags;
795165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
795265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If the character is more than one byte long, we can set firstchar
795365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      only if it is not to be matched caselessly. */
795465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
795565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (mclength == 1 || req_caseopt == 0)
795665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
795765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        firstchar = mcbuffer[0] | req_caseopt;
795865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        firstchar = mcbuffer[0];
795965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        firstcharflags = req_caseopt;
796065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
796165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (mclength != 1)
796265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
796365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          reqchar = code[-1];
796465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          reqcharflags = cd->req_varyopt;
796565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
796665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
796765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else firstcharflags = reqcharflags = REQ_NONE;
796865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
796965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
797065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* firstchar was previously set; we can set reqchar only if the length is
797165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    1 or the matching is caseful. */
797265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
797365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
797465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
797565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zerofirstchar = firstchar;
797665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zerofirstcharflags = firstcharflags;
797765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zeroreqchar = reqchar;
797865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      zeroreqcharflags = reqcharflags;
797965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (mclength == 1 || req_caseopt == 0)
798065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
798165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        reqchar = code[-1];
798265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        reqcharflags = req_caseopt | cd->req_varyopt;
798365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
798465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
798565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
798665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;            /* End of literal character handling */
798765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
798865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }                   /* end of big loop */
798965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
799065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
799165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Control never reaches here by falling through, only by a goto for all the
799265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevicherror states. Pass back the position in the pattern so that it can be displayed
799365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto the user for diagnosing the error. */
799465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
799565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichFAILED:
799665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ptrptr = ptr;
799765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn FALSE;
799865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
799965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
800065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
800165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
800265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
800365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*     Compile sequence of alternatives           *
800465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
800565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
800665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* On entry, ptr is pointing past the bracket character, but on return it
800765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpoints to the closing bracket, or vertical bar, or end of string. The code
800865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvariable is pointing at the byte into which the BRA operator has been stored.
800965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThis function is used during the pre-compile phase when we are trying to find
801065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichout the amount of memory needed, as well as during the real compile phase. The
801165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue of lengthptr distinguishes the two phases.
801265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
801365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
801465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  options           option bits, including any changes for this subpattern
801565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  codeptr           -> the address of the current code pointer
801665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptrptr            -> the address of the current pattern pointer
801765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcodeptr      -> pointer to error code variable
801865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  lookbehind        TRUE if this is a lookbehind assertion
801965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  reset_bracount    TRUE to reset the count for each branch
802065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  skipbytes         skip this many bytes at start (for brackets and OP_COND)
802165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cond_depth        depth of nesting for conditional subpatterns
802265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  firstcharptr      place to put the first required character
802365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  firstcharflagsptr place to put the first character flags, or a negative number
802465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  reqcharptr        place to put the last required character
802565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  reqcharflagsptr   place to put the last required character flags, or a negative number
802665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  bcptr             pointer to the chain of currently open branches
802765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd                points to the data block with tables pointers etc.
802865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  lengthptr         NULL during the real compile phase
802965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                    points to length accumulator during pre-compile phase
803065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
803165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:            TRUE on success
803265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
803365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
803465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
803565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
803665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
803765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int cond_depth,
803865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr,
803965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr,
804065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  branch_chain *bcptr, compile_data *cd, int *lengthptr)
804165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
804265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *ptr = *ptrptr;
804365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *code = *codeptr;
804465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *last_branch = code;
804565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *start_bracket = code;
804665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *reverse_count = NULL;
804765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichopen_capitem capitem;
804865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint capnumber = 0;
804965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 firstchar, reqchar;
805065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 firstcharflags, reqcharflags;
805165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 branchfirstchar, branchreqchar;
805265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 branchfirstcharflags, branchreqcharflags;
805365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint length;
805465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichunsigned int orig_bracount;
805565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichunsigned int max_bracount;
805665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbranch_chain bc;
805765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
805865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If set, call the external function that checks for stack availability. */
805965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
806065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (PUBL(stack_guard) != NULL && PUBL(stack_guard)())
806165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
806265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  *errorcodeptr= ERR85;
806365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return FALSE;
806465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
806565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
806665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Miscellaneous initialization */
806765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
806865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbc.outer = bcptr;
806965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbc.current_branch = code;
807065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
807165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirstchar = reqchar = 0;
807265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirstcharflags = reqcharflags = REQ_UNSET;
807365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
807465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Accumulate the length for use in the pre-compile phase. Start with the
807565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength of the BRA and KET and any extra bytes that are required at the
807665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbeginning. We accumulate in a local variable to save frequent testing of
807765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlenthptr for NULL. We cannot do this by looking at the value of code at the
807865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstart and end of each alternative, because compiled items are discarded during
807965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe pre-compile phase so that the work space is not exceeded. */
808065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
808165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength = 2 + 2*LINK_SIZE + skipbytes;
808265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
808365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* WARNING: If the above line is changed for any reason, you must also change
808465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe code that abstracts option settings at the start of the pattern and makes
808565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthem global. It tests the value of length for (2 + 2*LINK_SIZE) in the
808665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpre-compile phase to find out whether anything has yet been compiled or not. */
808765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
808865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If this is a capturing subpattern, add to the chain of open capturing items
808965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichso that we can detect them if (*ACCEPT) is encountered. This is also used to
809065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdetect groups that contain recursive back references to themselves. Note that
809165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichonly OP_CBRA need be tested here; changing this opcode to one of its variants,
809265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kraleviche.g. OP_SCBRAPOS, happens later, after the group has been compiled. */
809365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
809465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (*code == OP_CBRA)
809565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
809665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  capnumber = GET2(code, 1 + LINK_SIZE);
809765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  capitem.number = capnumber;
809865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  capitem.next = cd->open_caps;
809965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  capitem.flag = FALSE;
810065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd->open_caps = &capitem;
810165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
810265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
810365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Offset is set zero to mark that this bracket is still open */
810465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
810565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPUT(code, 1, 0);
810665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcode += 1 + LINK_SIZE + skipbytes;
810765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
810865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Loop for each alternative branch */
810965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
811065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichorig_bracount = max_bracount = cd->bracount;
811165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;)
811265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
811365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* For a (?| group, reset the capturing bracket count so that each branch
811465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  uses the same numbers. */
811565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
811665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (reset_bracount) cd->bracount = orig_bracount;
811765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
811865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Set up dummy OP_REVERSE if lookbehind assertion */
811965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
812065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (lookbehind)
812165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
812265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *code++ = OP_REVERSE;
812365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    reverse_count = code;
812465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    PUTINC(code, 0, 0);
812565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    length += 1 + LINK_SIZE;
812665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
812765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
812865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Now compile the branch; in the pre-compile phase its length gets added
812965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  into the length. */
813065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
813165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,
813265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        &branchfirstcharflags, &branchreqchar, &branchreqcharflags, &bc,
813365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        cond_depth, cd, (lengthptr == NULL)? NULL : &length))
813465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
813565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *ptrptr = ptr;
813665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return FALSE;
813765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
813865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
813965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Keep the highest bracket count in case (?| was used and some branch
814065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  has fewer than the rest. */
814165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
814265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (cd->bracount > max_bracount) max_bracount = cd->bracount;
814365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
814465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* In the real compile phase, there is some post-processing to be done. */
814565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
814665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (lengthptr == NULL)
814765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
814865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If this is the first branch, the firstchar and reqchar values for the
814965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    branch become the values for the regex. */
815065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
815165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (*last_branch != OP_ALT)
815265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
815365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      firstchar = branchfirstchar;
815465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      firstcharflags = branchfirstcharflags;
815565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      reqchar = branchreqchar;
815665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      reqcharflags = branchreqcharflags;
815765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
815865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
815965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If this is not the first branch, the first char and reqchar have to
816065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    match the values from all the previous branches, except that if the
816165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    previous value for reqchar didn't have REQ_VARY set, it can still match,
816265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    and we set REQ_VARY for the regex. */
816365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
816465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
816565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
816665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If we previously had a firstchar, but it doesn't match the new branch,
816765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      we have to abandon the firstchar for the regex, but if there was
816865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      previously no reqchar, it takes on the value of the old firstchar. */
816965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
817065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (firstcharflags >= 0 &&
817165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          (firstcharflags != branchfirstcharflags || firstchar != branchfirstchar))
817265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
817365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (reqcharflags < 0)
817465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
817565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          reqchar = firstchar;
817665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          reqcharflags = firstcharflags;
817765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
817865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        firstcharflags = REQ_NONE;
817965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
818065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
818165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* If we (now or from before) have no firstchar, a firstchar from the
818265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      branch becomes a reqchar if there isn't a branch reqchar. */
818365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
818465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (firstcharflags < 0 && branchfirstcharflags >= 0 && branchreqcharflags < 0)
818565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
818665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        branchreqchar = branchfirstchar;
818765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        branchreqcharflags = branchfirstcharflags;
818865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
818965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
819065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      /* Now ensure that the reqchars match */
819165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
819265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (((reqcharflags & ~REQ_VARY) != (branchreqcharflags & ~REQ_VARY)) ||
819365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          reqchar != branchreqchar)
819465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        reqcharflags = REQ_NONE;
819565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else
819665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
819765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        reqchar = branchreqchar;
819865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        reqcharflags |= branchreqcharflags; /* To "or" REQ_VARY */
819965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
820065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
820165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
820265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If lookbehind, check that this branch matches a fixed-length string, and
820365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    put the length into the OP_REVERSE item. Temporarily mark the end of the
820465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    branch with OP_END. If the branch contains OP_RECURSE, the result is -3
820565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    because there may be forward references that we can't check here. Set a
820665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    flag to cause another lookbehind check at the end. Why not do it all at the
820765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    end? Because common, erroneous checks are picked up here and the offset of
820865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    the problem can be shown. */
820965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
821065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (lookbehind)
821165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
821265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int fixed_length;
821365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *code = OP_END;
821465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      fixed_length = find_fixedlength(last_branch,  (options & PCRE_UTF8) != 0,
821565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        FALSE, cd);
821665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      DPRINTF(("fixed length = %d\n", fixed_length));
821765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (fixed_length == -3)
821865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
821965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        cd->check_lookbehind = TRUE;
822065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
822165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (fixed_length < 0)
822265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
822365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *errorcodeptr = (fixed_length == -2)? ERR36 :
822465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                        (fixed_length == -4)? ERR70: ERR25;
822565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *ptrptr = ptr;
822665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        return FALSE;
822765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
822865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else
822965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
823065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (fixed_length > cd->max_lookbehind)
823165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          cd->max_lookbehind = fixed_length;
823265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        PUT(reverse_count, 0, fixed_length);
823365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
823465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
823565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
823665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
823765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Reached end of expression, either ')' or end of pattern. In the real
823865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  compile phase, go back through the alternative branches and reverse the chain
823965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  of offsets, with the field in the BRA item now becoming an offset to the
824065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  first alternative. If there are no alternatives, it points to the end of the
824165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  group. The length in the terminating ket is always the length of the whole
824265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  bracketed item. Return leaving the pointer at the terminating char. */
824365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
824465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (*ptr != CHAR_VERTICAL_LINE)
824565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
824665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (lengthptr == NULL)
824765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
824865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int branch_length = (int)(code - last_branch);
824965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      do
825065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
825165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        int prev_length = GET(last_branch, 1);
825265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        PUT(last_branch, 1, branch_length);
825365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        branch_length = prev_length;
825465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        last_branch -= branch_length;
825565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
825665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (branch_length > 0);
825765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
825865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
825965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Fill in the ket */
826065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
826165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *code = OP_KET;
826265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    PUT(code, 1, (int)(code - start_bracket));
826365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += 1 + LINK_SIZE;
826465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
826565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* If it was a capturing subpattern, check to see if it contained any
826665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    recursive back references. If so, we must wrap it in atomic brackets.
826765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    Because we are moving code along, we must ensure that any pending recursive
826865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    references are updated. In any event, remove the block from the chain. */
826965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
827065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (capnumber > 0)
827165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
827265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (cd->open_caps->flag)
827365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
827465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code = OP_END;
827565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        adjust_recurse(start_bracket, 1 + LINK_SIZE,
827665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          (options & PCRE_UTF8) != 0, cd, cd->hwm);
827765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
827865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          IN_UCHARS(code - start_bracket));
827965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *start_bracket = OP_ONCE;
828065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code += 1 + LINK_SIZE;
828165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        PUT(start_bracket, 1, (int)(code - start_bracket));
828265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *code = OP_KET;
828365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        PUT(code, 1, (int)(code - start_bracket));
828465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        code += 1 + LINK_SIZE;
828565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        length += 2 + 2*LINK_SIZE;
828665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
828765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      cd->open_caps = cd->open_caps->next;
828865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
828965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
829065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Retain the highest bracket number, in case resetting was used. */
829165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
829265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cd->bracount = max_bracount;
829365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
829465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* Set values to pass back */
829565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
829665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *codeptr = code;
829765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *ptrptr = ptr;
829865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *firstcharptr = firstchar;
829965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *firstcharflagsptr = firstcharflags;
830065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *reqcharptr = reqchar;
830165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *reqcharflagsptr = reqcharflags;
830265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (lengthptr != NULL)
830365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
830465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (OFLOW_MAX - *lengthptr < length)
830565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
830665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        *errorcodeptr = ERR20;
830765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        return FALSE;
830865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
830965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *lengthptr += length;
831065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
831165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return TRUE;
831265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
831365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
831465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Another branch follows. In the pre-compile phase, we can move the code
831565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pointer back to where it was for the start of the first branch. (That is,
831665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pretend that each branch is the only one.)
831765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
831865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  In the real compile phase, insert an ALT node. Its length field points back
831965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  to the previous branch while the bracket remains open. At the end the chain
832065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  is reversed. It's done like this so that the start of the bracket has a
832165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  zero offset until it is closed, making it possible to detect recursion. */
832265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
832365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (lengthptr != NULL)
832465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
832565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code = *codeptr + 1 + LINK_SIZE + skipbytes;
832665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    length += 1 + LINK_SIZE;
832765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
832865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
832965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
833065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    *code = OP_ALT;
833165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    PUT(code, 1, (int)(code - last_branch));
833265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    bc.current_branch = last_branch = code;
833365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    code += 1 + LINK_SIZE;
833465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
833565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
833665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ptr++;
833765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
833865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Control never reaches here */
833965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
834065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
834165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
834265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
834365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
834465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
834565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*          Check for anchored expression         *
834665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
834765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
834865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Try to find out if this is an anchored regular expression. Consider each
834965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichalternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket
835065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichall of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then
835165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichit's anchored. However, if this is a multiline pattern, then only OP_SOD will
835265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbe found, because ^ generates OP_CIRCM in that mode.
835365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
835465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichWe can also consider a regex to be anchored if OP_SOM starts all its branches.
835565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThis is the code for \G, which means "match at start of match position, taking
835665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinto account the match offset".
835765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
835865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichA branch is also implicitly anchored if it starts with .* and DOTALL is set,
835965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause that will try the rest of the pattern at all possible matching points,
836065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichso there is no point trying again.... er ....
836165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
836265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich.... except when the .* appears inside capturing parentheses, and there is a
836365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsubsequent back reference to those parentheses. We haven't enough information
836465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto catch that case precisely.
836565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
836665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichAt first, the best we could do was to detect when .* was in capturing brackets
836765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichand the highest back reference was greater than or equal to that level.
836865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichHowever, by keeping a bitmap of the first 31 back references, we can catch some
836965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichof the more common cases more precisely.
837065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
837165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich... A second exception is when the .* appears inside an atomic group, because
837265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthis prevents the number of characters it matches from being adjusted.
837365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
837465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
837565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code           points to start of expression (the bracket)
837665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  bracket_map    a bitmap of which brackets we are inside while testing; this
837765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  handles up to substring 31; after that we just have to take
837865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  the less precise approach
837965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd             points to the compile data block
838065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  atomcount      atomic group level
838165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
838265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:     TRUE or FALSE
838365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
838465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
838565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
838665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis_anchored(register const pcre_uchar *code, unsigned int bracket_map,
838765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  compile_data *cd, int atomcount)
838865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
838965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdo {
839065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   const pcre_uchar *scode = first_significant_code(
839165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     code + PRIV(OP_lengths)[*code], FALSE);
839265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   register int op = *scode;
839365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
839465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* Non-capturing brackets */
839565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
839665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   if (op == OP_BRA  || op == OP_BRAPOS ||
839765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       op == OP_SBRA || op == OP_SBRAPOS)
839865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
839965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
840065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
840165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
840265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* Capturing brackets */
840365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
840465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   else if (op == OP_CBRA  || op == OP_CBRAPOS ||
840565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            op == OP_SCBRA || op == OP_SCBRAPOS)
840665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
840765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     int n = GET2(scode, 1+LINK_SIZE);
840865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
840965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (!is_anchored(scode, new_map, cd, atomcount)) return FALSE;
841065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
841165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
841265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* Positive forward assertions and conditions */
841365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
841465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   else if (op == OP_ASSERT || op == OP_COND)
841565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
841665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
841765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
841865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
841965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* Atomic groups */
842065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
842165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   else if (op == OP_ONCE || op == OP_ONCE_NC)
842265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
842365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (!is_anchored(scode, bracket_map, cd, atomcount + 1))
842465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       return FALSE;
842565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
842665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
842765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
842865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   it isn't in brackets that are or may be referenced or inside an atomic
842965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   group. */
843065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
843165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
843265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             op == OP_TYPEPOSSTAR))
843365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
843465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (scode[1] != OP_ALLANY || (bracket_map & cd->backref_map) != 0 ||
843565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         atomcount > 0 || cd->had_pruneorskip)
843665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       return FALSE;
843765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
843865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
843965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* Check for explicit anchoring */
844065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
844165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE;
844265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
844365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   code += GET(code, 1);
844465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   }
844565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (*code == OP_ALT);   /* Loop for each alternative */
844665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn TRUE;
844765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
844865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
844965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
845065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
845165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
845265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*         Check for starting with ^ or .*        *
845365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
845465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
845565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is called to find out if every branch starts with ^ or .* so that
845665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"first char" processing can be done to speed things up in multiline
845765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmatching and for non-DOTALL patterns that start with .* (which must start at
845865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe beginning or after \n). As in the case of is_anchored() (see above), we
845965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhave to take account of back references to capturing brackets that contain .*
846065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause in that case we can't make the assumption. Also, the appearance of .*
846165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not
846265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcount, because once again the assumption no longer holds.
846365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
846465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
846565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code           points to start of expression (the bracket)
846665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  bracket_map    a bitmap of which brackets we are inside while testing; this
846765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  handles up to substring 31; after that we just have to take
846865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  the less precise approach
846965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd             points to the compile data
847065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  atomcount      atomic group level
847165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
847265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:         TRUE or FALSE
847365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
847465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
847565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL
847665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis_startline(const pcre_uchar *code, unsigned int bracket_map,
847765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  compile_data *cd, int atomcount)
847865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
847965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdo {
848065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   const pcre_uchar *scode = first_significant_code(
848165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     code + PRIV(OP_lengths)[*code], FALSE);
848265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   register int op = *scode;
848365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
848465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* If we are at the start of a conditional assertion group, *both* the
848565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   conditional assertion *and* what follows the condition must satisfy the test
848665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   for start of line. Other kinds of condition fail. Note that there may be an
848765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   auto-callout at the start of a condition. */
848865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
848965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   if (op == OP_COND)
849065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
849165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     scode += 1 + LINK_SIZE;
849265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
849365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     switch (*scode)
849465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       {
849565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       case OP_CREF:
849665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       case OP_DNCREF:
849765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       case OP_RREF:
849865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       case OP_DNRREF:
849965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       case OP_DEF:
850065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       return FALSE;
850165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
850265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       default:     /* Assertion */
850365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
850465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       do scode += GET(scode, 1); while (*scode == OP_ALT);
850565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       scode += 1 + LINK_SIZE;
850665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       break;
850765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       }
850865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     scode = first_significant_code(scode, FALSE);
850965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     op = *scode;
851065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
851165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
851265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* Non-capturing brackets */
851365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
851465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   if (op == OP_BRA  || op == OP_BRAPOS ||
851565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       op == OP_SBRA || op == OP_SBRAPOS)
851665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
851765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
851865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
851965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
852065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* Capturing brackets */
852165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
852265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   else if (op == OP_CBRA  || op == OP_CBRAPOS ||
852365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            op == OP_SCBRA || op == OP_SCBRAPOS)
852465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
852565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     int n = GET2(scode, 1+LINK_SIZE);
852665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
852765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (!is_startline(scode, new_map, cd, atomcount)) return FALSE;
852865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
852965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
853065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* Positive forward assertions */
853165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
853265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   else if (op == OP_ASSERT)
853365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
853465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
853565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
853665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
853765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* Atomic brackets */
853865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
853965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   else if (op == OP_ONCE || op == OP_ONCE_NC)
854065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
854165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (!is_startline(scode, bracket_map, cd, atomcount + 1)) return FALSE;
854265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
854365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
854465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* .* means "start at start or after \n" if it isn't in atomic brackets or
854565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   brackets that may be referenced, as long as the pattern does not contain
854665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   *PRUNE or *SKIP, because these break the feature. Consider, for example,
854765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the
854865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   start of a line. */
854965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
855065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
855165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
855265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 ||
855365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich         atomcount > 0 || cd->had_pruneorskip)
855465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       return FALSE;
855565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
855665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
855765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* Check for explicit circumflex; anything else gives a FALSE result. Note
855865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC
855965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   because the number of characters matched by .* cannot be adjusted inside
856065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   them. */
856165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
856265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   else if (op != OP_CIRC && op != OP_CIRCM) return FALSE;
856365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
856465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   /* Move on to the next alternative */
856565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
856665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   code += GET(code, 1);
856765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   }
856865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (*code == OP_ALT);  /* Loop for each alternative */
856965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn TRUE;
857065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
857165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
857265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
857365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
857465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
857565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*       Check for asserted fixed first char      *
857665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
857765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
857865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* During compilation, the "first char" settings from forward assertions are
857965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdiscarded, because they can cause conflicts with actual literals that follow.
858065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichHowever, if we end up without a first char setting for an unanchored pattern,
858165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichit is worth scanning the regex to see if there is an initial asserted first
858265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichchar. If all branches start with the same asserted char, or with a
858365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnon-conditional bracket all of whose alternatives start with the same asserted
858465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichchar (recurse ad lib), then we return that char, with the flags set to zero or
858565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichREQ_CASELESS; otherwise return zero with REQ_NONE in the flags.
858665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
858765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
858865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  code       points to start of expression (the bracket)
858965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  flags      points to the first char flags, or to REQ_NONE
859065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  inassert   TRUE if in an assertion
859165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
859265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:     the fixed first char, or 0 with REQ_NONE in flags
859365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
859465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
859565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic pcre_uint32
859665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfind_firstassertedchar(const pcre_uchar *code, pcre_int32 *flags,
859765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  BOOL inassert)
859865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
859965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uint32 c = 0;
860065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint cflags = REQ_NONE;
860165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
860265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*flags = REQ_NONE;
860365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdo {
860465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   pcre_uint32 d;
860565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   int dflags;
860665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
860765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich             *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
860865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
860965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     TRUE);
861065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   register pcre_uchar op = *scode;
861165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
861265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   switch(op)
861365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     {
861465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     default:
861565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     return 0;
861665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
861765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_BRA:
861865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_BRAPOS:
861965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_CBRA:
862065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_SCBRA:
862165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_CBRAPOS:
862265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_SCBRAPOS:
862365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_ASSERT:
862465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_ONCE:
862565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_ONCE_NC:
862665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT);
862765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (dflags < 0)
862865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       return 0;
862965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (cflags < 0) { c = d; cflags = dflags; } else if (c != d || cflags != dflags) return 0;
863065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     break;
863165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
863265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_EXACT:
863365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     scode += IMM2_SIZE;
863465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     /* Fall through */
863565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
863665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_CHAR:
863765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_PLUS:
863865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_MINPLUS:
863965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_POSPLUS:
864065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (!inassert) return 0;
864165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (cflags < 0) { c = scode[1]; cflags = 0; }
864265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       else if (c != scode[1]) return 0;
864365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     break;
864465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
864565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_EXACTI:
864665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     scode += IMM2_SIZE;
864765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     /* Fall through */
864865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
864965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_CHARI:
865065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_PLUSI:
865165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_MINPLUSI:
865265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     case OP_POSPLUSI:
865365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (!inassert) return 0;
865465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
865565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       else if (c != scode[1]) return 0;
865665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     break;
865765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     }
865865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
865965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   code += GET(code, 1);
866065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich   }
866165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (*code == OP_ALT);
866265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
866365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*flags = cflags;
866465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn c;
866565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
866665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
866765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
866865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
866965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
867065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*     Add an entry to the name/number table      *
867165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
867265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
867365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called between compiling passes to add an entry to the
867465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichname/number table, maintaining alphabetical order. Checking for permitted
867565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichand forbidden duplicates has already been done.
867665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
867765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
867865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd           the compile data block
867965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  name         the name to add
868065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  length       the length of the name
868165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  groupno      the group number
868265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
868365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:       nothing
868465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
868565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
868665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic void
868765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichadd_name(compile_data *cd, const pcre_uchar *name, int length,
868865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  unsigned int groupno)
868965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
869065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint i;
869165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *slot = cd->name_table;
869265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
869365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (i = 0; i < cd->names_found; i++)
869465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
869565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(length));
869665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (crc == 0 && slot[IMM2_SIZE+length] != 0)
869765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    crc = -1; /* Current name is a substring */
869865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
869965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Make space in the table and break the loop for an earlier name. For a
870065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  duplicate or later name, carry on. We do this for duplicates so that in the
870165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  simple case (when ?(| is not used) they are in order of their numbers. In all
870265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cases they are in the order in which they appear in the pattern. */
870365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
870465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (crc < 0)
870565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
870665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    memmove(slot + cd->name_entry_size, slot,
870765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      IN_UCHARS((cd->names_found - i) * cd->name_entry_size));
870865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
870965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
871065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
871165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Continue the loop for a later or duplicate name */
871265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
871365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  slot += cd->name_entry_size;
871465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
871565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
871665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPUT2(slot, 0, groupno);
871765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmemcpy(slot + IMM2_SIZE, name, IN_UCHARS(length));
871865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichslot[IMM2_SIZE + length] = 0;
871965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->names_found++;
872065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
872165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
872265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
872365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
872465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
872565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*        Compile a Regular Expression            *
872665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
872765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
872865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function takes a string and returns a pointer to a block of store
872965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichholding a compiled version of the expression. The original API for this
873065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfunction had no error code return variable; it is retained for backwards
873165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompatibility. The new function is given a new name.
873265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
873365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments:
873465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pattern       the regular expression
873565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  options       various option bits
873665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcodeptr  pointer to error code variable (pcre_compile2() only)
873765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                  can be NULL if you don't want a code value
873865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorptr      pointer to pointer to error text
873965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  erroroffset   ptr offset in pattern where error was detected
874065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  tables        pointer to character tables or NULL
874165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
874265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns:        pointer to compiled data block, or NULL on error,
874365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                with errorptr and erroroffset set
874465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
874565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
874665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
874765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
874865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_compile(const char *pattern, int options, const char **errorptr,
874965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int *erroroffset, const unsigned char *tables)
875065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
875165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
875265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
875365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int *erroroffset, const unsigned char *tables)
875465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32
875565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION
875665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre32_compile(PCRE_SPTR32 pattern, int options, const char **errorptr,
875765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int *erroroffset, const unsigned char *tables)
875865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
875965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
876065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
876165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
876265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
876365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
876465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32
876565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn pcre32_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
876665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
876765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
876865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
876965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
877065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
877165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
877265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_compile2(const char *pattern, int options, int *errorcodeptr,
877365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const char **errorptr, int *erroroffset, const unsigned char *tables)
877465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
877565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
877665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
877765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const char **errorptr, int *erroroffset, const unsigned char *tables)
877865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32
877965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION
878065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr,
878165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const char **errorptr, int *erroroffset, const unsigned char *tables)
878265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
878365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
878465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichREAL_PCRE *re;
878565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint length = 1;  /* For final END opcode */
878665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 firstcharflags, reqcharflags;
878765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 firstchar, reqchar;
878865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 limit_match = PCRE_UINT32_MAX;
878965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 limit_recursion = PCRE_UINT32_MAX;
879065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint newline;
879165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint errorcode = 0;
879265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint skipatstart = 0;
879365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL utf;
879465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL never_utf = FALSE;
879565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsize_t size;
879665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *code;
879765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *codestart;
879865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *ptr;
879965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompile_data compile_block;
880065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompile_data *cd = &compile_block;
880165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
880265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This space is used for "compiling" into during the first phase, when we are
880365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcomputing the amount of memory that is needed. Compiled items are thrown away
880465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichas soon as possible, so that a fairly large buffer should be sufficient for
880565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthis purpose. The same space is used in the second phase for remembering where
880665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto fill in forward references to subpatterns. That may overflow, in which case
880765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnew memory is obtained from malloc(). */
880865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
880965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar cworkspace[COMPILE_WORK_SIZE];
881065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
881165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This vector is used for remembering name groups during the pre-compile. In a
881265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsimilar way to cworkspace, it can be expanded using malloc() if necessary. */
881365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
881465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnamed_group named_groups[NAMED_GROUP_LIST_SIZE];
881565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
881665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Set this early so that early errors get offset 0. */
881765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
881865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichptr = (const pcre_uchar *)pattern;
881965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
882065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* We can't pass back an error message if errorptr is NULL; I guess the best we
882165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcan do is just return NULL, but we can set a code value if there is a code
882265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpointer. */
882365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
882465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorptr == NULL)
882565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
882665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (errorcodeptr != NULL) *errorcodeptr = 99;
882765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return NULL;
882865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
882965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
883065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*errorptr = NULL;
883165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorcodeptr != NULL) *errorcodeptr = ERR0;
883265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
883365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* However, we can give a message for this error */
883465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
883565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (erroroffset == NULL)
883665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
883765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcode = ERR16;
883865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  goto PCRE_EARLY_ERROR_RETURN2;
883965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
884065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
884165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*erroroffset = 0;
884265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
884365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Set up pointers to the individual character tables */
884465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
884565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (tables == NULL) tables = PRIV(default_tables);
884665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->lcc = tables + lcc_offset;
884765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->fcc = tables + fcc_offset;
884865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->cbits = tables + cbits_offset;
884965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->ctypes = tables + ctypes_offset;
885065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
885165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Check that all undefined public option bits are zero */
885265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
885365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
885465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
885565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcode = ERR17;
885665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  goto PCRE_EARLY_ERROR_RETURN;
885765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
885865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
885965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If PCRE_NEVER_UTF is set, remember it. */
886065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
886165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE;
886265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
886365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Check for global one-time settings at the start of the pattern, and remember
886465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe offset for later. */
886565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
886665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->external_flags = 0;   /* Initialize here for LIMIT_MATCH/RECURSION */
886765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
886865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
886965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       ptr[skipatstart+1] == CHAR_ASTERISK)
887065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
887165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int newnl = 0;
887265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int newbsr = 0;
887365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
887465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* For completeness and backward compatibility, (*UTFn) is supported in the
887565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrelevant libraries, but (*UTF) is generic and always supported. Note that
887665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
887765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
887865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE8
887965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)
888065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 7; options |= PCRE_UTF8; continue; }
888165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
888265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE16
888365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF16_RIGHTPAR, 6) == 0)
888465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 8; options |= PCRE_UTF16; continue; }
888565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
888665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE32
888765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF32_RIGHTPAR, 6) == 0)
888865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 8; options |= PCRE_UTF32; continue; }
888965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
889065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
889165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 4) == 0)
889265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 6; options |= PCRE_UTF8; continue; }
889365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
889465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 6; options |= PCRE_UCP; continue; }
889565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_AUTO_POSSESS_RIGHTPAR, 16) == 0)
889665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 18; options |= PCRE_NO_AUTO_POSSESS; continue; }
889765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
889865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
889965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
890065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0)
890165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
890265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    pcre_uint32 c = 0;
890365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    int p = skipatstart + 14;
890465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    while (isdigit(ptr[p]))
890565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
890665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (c > PCRE_UINT32_MAX / 10 - 1) break;   /* Integer overflow */
890765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c = c*10 + ptr[p++] - CHAR_0;
890865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
890965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
891065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c < limit_match)
891165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
891265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      limit_match = c;
891365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      cd->external_flags |= PCRE_MLSET;
891465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
891565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skipatstart = p;
891665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
891765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
891865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
891965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0)
892065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
892165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    pcre_uint32 c = 0;
892265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    int p = skipatstart + 18;
892365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    while (isdigit(ptr[p]))
892465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
892565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (c > PCRE_UINT32_MAX / 10 - 1) break;   /* Integer overflow check */
892665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      c = c*10 + ptr[p++] - CHAR_0;
892765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
892865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
892965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (c < limit_recursion)
893065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
893165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      limit_recursion = c;
893265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      cd->external_flags |= PCRE_RLSET;
893365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
893465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    skipatstart = p;
893565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;
893665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
893765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
893865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
893965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
894065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)
894165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
894265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5)  == 0)
894365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
894465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0)
894565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
894665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0)
894765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
894865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
894965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
895065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
895165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
895265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
895365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
895465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (newnl != 0)
895565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    options = (options & ~PCRE_NEWLINE_BITS) | newnl;
895665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else if (newbsr != 0)
895765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
895865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else break;
895965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
896065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
896165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
896265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichutf = (options & PCRE_UTF8) != 0;
896365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (utf && never_utf)
896465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
896565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcode = ERR78;
896665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  goto PCRE_EARLY_ERROR_RETURN2;
896765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
896865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
896965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Can't support UTF unless PCRE has been compiled to include the code. The
897065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn of an error code from PRIV(valid_utf)() is a new feature, introduced in
897165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrelease 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
897265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnot used here. */
897365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
897465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF
897565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
897665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
897765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
897865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
897965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcode = ERR44;
898065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
898165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcode = ERR74;
898265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32
898365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcode = ERR77;
898465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
898565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  goto PCRE_EARLY_ERROR_RETURN2;
898665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
898765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
898865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (utf)
898965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
899065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcode = ERR32;
899165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  goto PCRE_EARLY_ERROR_RETURN;
899265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
899365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
899465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
899565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Can't support UCP unless PCRE has been compiled to include the code. */
899665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
899765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef SUPPORT_UCP
899865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & PCRE_UCP) != 0)
899965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
900065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcode = ERR67;
900165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  goto PCRE_EARLY_ERROR_RETURN;
900265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
900365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
900465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
900565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Check validity of \R options. */
900665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
900765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) ==
900865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
900965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
901065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcode = ERR56;
901165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  goto PCRE_EARLY_ERROR_RETURN;
901265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
901365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
901465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Handle different types of newline. The three bits give seven cases. The
901565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcurrent code allows for fixed one- or two-byte sequences, plus "any" and
901665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"anycrlf". */
901765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
901865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichswitch (options & PCRE_NEWLINE_BITS)
901965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
902065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case 0: newline = NEWLINE; break;   /* Build-time default */
902165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
902265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
902365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PCRE_NEWLINE_CR+
902465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
902565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PCRE_NEWLINE_ANY: newline = -1; break;
902665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
902765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
902865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
902965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
903065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (newline == -2)
903165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
903265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd->nltype = NLTYPE_ANYCRLF;
903365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
903465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse if (newline < 0)
903565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
903665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd->nltype = NLTYPE_ANY;
903765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
903865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse
903965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
904065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd->nltype = NLTYPE_FIXED;
904165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (newline > 255)
904265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
904365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cd->nllen = 2;
904465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cd->nl[0] = (newline >> 8) & 255;
904565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cd->nl[1] = newline & 255;
904665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
904765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
904865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
904965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cd->nllen = 1;
905065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cd->nl[0] = newline;
905165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
905265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
905365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
905465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Maximum back reference and backref bitmap. The bitmap records up to 31 back
905565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreferences to help in deciding whether (.*) can be treated as anchored or not.
905665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
905765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
905865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->top_backref = 0;
905965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->backref_map = 0;
906065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
906165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Reflect pattern for debugging output */
906265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
906365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichDPRINTF(("------------------------------------------------------------------\n"));
906465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef PCRE_DEBUG
906565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichprint_puchar(stdout, (PCRE_PUCHAR)pattern);
906665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
906765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichDPRINTF(("\n"));
906865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
906965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Pretend to compile the pattern while actually just accumulating the length
907065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichof memory required. This behaviour is triggered by passing a non-NULL final
907165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichargument to compile_regex(). We pass a block of workspace (cworkspace) for it
907265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto compile parts of the pattern into; the compiled code is discarded when it is
907365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichno longer needed, so hopefully this workspace will never overflow, though there
907465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis a test for its doing so. */
907565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
907665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->bracount = cd->final_bracount = 0;
907765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->names_found = 0;
907865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->name_entry_size = 0;
907965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->name_table = NULL;
908065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->dupnames = FALSE;
908165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->namedrefcount = 0;
908265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_code = cworkspace;
908365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->hwm = cworkspace;
908465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_workspace = cworkspace;
908565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->workspace_size = COMPILE_WORK_SIZE;
908665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->named_groups = named_groups;
908765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->named_group_list_size = NAMED_GROUP_LIST_SIZE;
908865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_pattern = (const pcre_uchar *)pattern;
908965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
909065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->req_varyopt = 0;
909165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->parens_depth = 0;
909265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->assert_depth = 0;
909365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->max_lookbehind = 0;
909465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->external_options = options;
909565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->open_caps = NULL;
909665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
909765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Now do the pre-compile. On error, errorcode will be set non-zero, so we
909865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdon't need to look at the result of the function here. The initial options have
909965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbeen put into the cd block so that they can be changed if an option setting is
910065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfound within the regex right at the beginning. Bringing initial option settings
910165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichoutside can help speed up starting point checks. */
910265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
910365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichptr += skipatstart;
910465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcode = cworkspace;
910565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*code = OP_BRA;
910665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
910765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich(void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
910865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL,
910965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd, &length);
911065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
911165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
911265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichDPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
911365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  (int)(cd->hwm - cworkspace)));
911465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
911565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (length > MAX_PATTERN_SIZE)
911665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
911765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcode = ERR20;
911865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  goto PCRE_EARLY_ERROR_RETURN;
911965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
912065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
912165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If there are groups with duplicate names and there are also references by
912265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichname, we must allow for the possibility of named references to duplicated
912365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgroups. These require an extra data item each. */
912465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
912565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->dupnames && cd->namedrefcount > 0)
912665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  length += cd->namedrefcount * IMM2_SIZE * sizeof(pcre_uchar);
912765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
912865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Compute the size of the data block for storing the compiled pattern. Integer
912965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichoverflow should no longer be possible because nowadays we limit the maximum
913065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue of cd->names_found and cd->name_entry_size. */
913165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
913265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsize = sizeof(REAL_PCRE) +
913365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);
913465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
913565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Get the memory. */
913665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
913765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre = (REAL_PCRE *)(PUBL(malloc))(size);
913865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (re == NULL)
913965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
914065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  errorcode = ERR21;
914165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  goto PCRE_EARLY_ERROR_RETURN;
914265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
914365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
914465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Put in the magic number, and save the sizes, initial options, internal
914565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichflags, and character table pointer. NULL is used for the default character
914665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichtables. The nullpad field is at the end; it's there to help in the case when a
914765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregex compiled on a system with 4-byte pointers is run on another with 8-byte
914865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpointers. */
914965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
915065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->magic_number = MAGIC_NUMBER;
915165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->size = (int)size;
915265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->options = cd->external_options;
915365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->flags = cd->external_flags;
915465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->limit_match = limit_match;
915565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->limit_recursion = limit_recursion;
915665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->first_char = 0;
915765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->req_char = 0;
915865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
915965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->name_entry_size = cd->name_entry_size;
916065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->name_count = cd->names_found;
916165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->ref_count = 0;
916265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->tables = (tables == PRIV(default_tables))? NULL : tables;
916365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->nullpad = NULL;
916465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE32
916565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->dummy = 0;
916665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else
916765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->dummy1 = re->dummy2 = re->dummy3 = 0;
916865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
916965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
917065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The starting points of the name/number translation table and of the code are
917165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpassed around in the compile data block. The start/end pattern and initial
917265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichoptions are already set from the pre-compile phase, as is the name_entry_size
917365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfield. Reset the bracket count and the names_found field. Also reset the hwm
917465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfield; this time it's used for remembering forward references to subpatterns.
917565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/
917665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
917765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->final_bracount = cd->bracount;  /* Save for checking forward references */
917865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->parens_depth = 0;
917965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->assert_depth = 0;
918065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->bracount = 0;
918165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->max_lookbehind = 0;
918265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->name_table = (pcre_uchar *)re + re->name_table_offset;
918365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcodestart = cd->name_table + re->name_entry_size * re->name_count;
918465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_code = codestart;
918565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->hwm = (pcre_uchar *)(cd->start_workspace);
918665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->req_varyopt = 0;
918765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->had_accept = FALSE;
918865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->had_pruneorskip = FALSE;
918965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->check_lookbehind = FALSE;
919065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->open_caps = NULL;
919165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
919265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If any named groups were found, create the name/number table from the list
919365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcreated in the first pass. */
919465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
919565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->names_found > 0)
919665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
919765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int i = cd->names_found;
919865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  named_group *ng = cd->named_groups;
919965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  cd->names_found = 0;
920065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  for (; i > 0; i--, ng++)
920165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    add_name(cd, ng->name, ng->length, ng->number);
920265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
920365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    (PUBL(free))((void *)cd->named_groups);
920465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
920565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
920665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Set up a starting, non-extracting bracket, then compile the expression. On
920765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevicherror, errorcode will be set non-zero, so we don't need to look at the result
920865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichof the function here. */
920965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
921065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichptr = (const pcre_uchar *)pattern + skipatstart;
921165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcode = (pcre_uchar *)codestart;
921265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*code = OP_BRA;
921365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich(void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
921465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, cd, NULL);
921565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->top_bracket = cd->bracount;
921665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->top_backref = cd->top_backref;
921765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->max_lookbehind = cd->max_lookbehind;
921865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->flags = cd->external_flags | PCRE_MODE;
921965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
922065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->had_accept)
922165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
922265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  reqchar = 0;              /* Must disable after (*ACCEPT) */
922365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  reqcharflags = REQ_NONE;
922465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
922565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
922665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If not reached end of pattern on success, there's an excess bracket. */
922765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
922865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorcode == 0 && *ptr != CHAR_NULL) errorcode = ERR22;
922965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
923065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Fill in the terminating state and check for disastrous overflow, but
923165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif debugging, leave the test till after things are printed out. */
923265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
923365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*code++ = OP_END;
923465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
923565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef PCRE_DEBUG
923665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (code - codestart > length) errorcode = ERR23;
923765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
923865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
923965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_VALGRIND
924065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If the estimated length exceeds the really used length, mark the extra
924165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichallocated memory as unaddressable, so that any out-of-bound reads can be
924265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdetected. */
924365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichVALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar));
924465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
924565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
924665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Fill in any forward references that are required. There may be repeated
924765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreferences; optimize for them, as searching a large regex takes time. */
924865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
924965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->hwm > cd->start_workspace)
925065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
925165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int prev_recno = -1;
925265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const pcre_uchar *groupptr = NULL;
925365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  while (errorcode == 0 && cd->hwm > cd->start_workspace)
925465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
925565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    int offset, recno;
925665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cd->hwm -= LINK_SIZE;
925765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    offset = GET(cd->hwm, 0);
925865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    recno = GET(codestart, offset);
925965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (recno != prev_recno)
926065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
926165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      groupptr = PRIV(find_bracket)(codestart, utf, recno);
926265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      prev_recno = recno;
926365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
926465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (groupptr == NULL) errorcode = ERR53;
926565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));
926665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
926765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
926865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
926965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If the workspace had to be expanded, free the new memory. Set the pointer to
927065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichNULL to indicate that forward references have been filled in. */
927165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
927265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->workspace_size > COMPILE_WORK_SIZE)
927365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  (PUBL(free))((void *)cd->start_workspace);
927465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_workspace = NULL;
927565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
927665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Give an error if there's back reference to a non-existent capturing
927765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsubpattern. */
927865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
927965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
928065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
928165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Unless disabled, check whether any single character iterators can be
928265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichauto-possessified. The function overwrites the appropriate opcode values, so
928365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe type of the pointer must be cast. NOTE: the intermediate variable "temp" is
928465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichused in this code because at least one compiler gives a warning about loss of
928565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"const" attribute if the cast (pcre_uchar *)codestart is used directly in the
928665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfunction call. */
928765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
928865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & PCRE_NO_AUTO_POSSESS) == 0)
928965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
929065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uchar *temp = (pcre_uchar *)codestart;
929165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  auto_possessify(temp, utf, cd);
929265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
929365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
929465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If there were any lookbehind assertions that contained OP_RECURSE
929565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich(recursions or subroutine calls), a flag is set for them to be checked here,
929665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause they may contain forward references. Actual recursions cannot be fixed
929765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength, but subroutine calls can. It is done like this so that those without
929865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichOP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
929965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichexceptional ones forgo this. We scan the pattern to check that they are fixed
930065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength, and set their lengths. */
930165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
930265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->check_lookbehind)
930365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
930465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uchar *cc = (pcre_uchar *)codestart;
930565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
930665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Loop, searching for OP_REVERSE items, and process those that do not have
930765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  their length set. (Actually, it will also re-process any that have a length
930865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  of zero, but that is a pathological case, and it does no harm.) When we find
930965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  one, we temporarily terminate the branch it is in while we scan it. */
931065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
931165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  for (cc = (pcre_uchar *)PRIV(find_bracket)(codestart, utf, -1);
931265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       cc != NULL;
931365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich       cc = (pcre_uchar *)PRIV(find_bracket)(cc, utf, -1))
931465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
931565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (GET(cc, 1) == 0)
931665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
931765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int fixed_length;
931865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
931965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int end_op = *be;
932065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *be = OP_END;
932165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
932265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        cd);
932365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      *be = end_op;
932465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      DPRINTF(("fixed length = %d\n", fixed_length));
932565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (fixed_length < 0)
932665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
932765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        errorcode = (fixed_length == -2)? ERR36 :
932865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich                    (fixed_length == -4)? ERR70 : ERR25;
932965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        break;
933065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
933165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length;
933265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      PUT(cc, 1, fixed_length);
933365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
933465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    cc += 1 + LINK_SIZE;
933565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
933665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
933765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
933865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Failed to compile, or error while post-processing */
933965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
934065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorcode != 0)
934165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
934265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  (PUBL(free))(re);
934365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  PCRE_EARLY_ERROR_RETURN:
934465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  *erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
934565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  PCRE_EARLY_ERROR_RETURN2:
934665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  *errorptr = find_error_text(errorcode);
934765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (errorcodeptr != NULL) *errorcodeptr = errorcode;
934865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return NULL;
934965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
935065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
935165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If the anchored option was not passed, set the flag if we can determine that
935265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe pattern is anchored by virtue of ^ characters or \A or anything else, such
935365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichas starting with non-atomic .* when DOTALL is set and there are no occurrences
935465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichof *PRUNE or *SKIP.
935565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
935665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichOtherwise, if we know what the first byte has to be, save it, because that
935765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichspeeds up unanchored matches no end. If not, see if we can set the
935865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_STARTLINE flag. This is helpful for multiline matches when all branches
935965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstart with ^. and also when all branches start with non-atomic .* for
936065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnon-DOTALL matches when *PRUNE and SKIP are not present. */
936165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
936265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((re->options & PCRE_ANCHORED) == 0)
936365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
936465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED;
936565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  else
936665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
936765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (firstcharflags < 0)
936865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      firstchar = find_firstassertedchar(codestart, &firstcharflags, FALSE);
936965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (firstcharflags >= 0)   /* Remove caseless flag for non-caseable chars */
937065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
937165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
937265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      re->first_char = firstchar & 0xff;
937365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
937465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      re->first_char = firstchar & 0xffff;
937565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32
937665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      re->first_char = firstchar;
937765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
937865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if ((firstcharflags & REQ_CASELESS) != 0)
937965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
938065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
938165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        /* We ignore non-ASCII first chars in 8 bit mode. */
938265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (utf)
938365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          {
938465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          if (re->first_char < 128)
938565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            {
938665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            if (cd->fcc[re->first_char] != re->first_char)
938765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich              re->flags |= PCRE_FCH_CASELESS;
938865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            }
938965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          else if (UCD_OTHERCASE(re->first_char) != re->first_char)
939065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            re->flags |= PCRE_FCH_CASELESS;
939165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          }
939265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        else
939365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
939465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (MAX_255(re->first_char)
939565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich            && cd->fcc[re->first_char] != re->first_char)
939665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          re->flags |= PCRE_FCH_CASELESS;
939765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
939865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
939965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      re->flags |= PCRE_FIRSTSET;
940065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
940165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
940265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else if (is_startline(codestart, 0, cd, 0)) re->flags |= PCRE_STARTLINE;
940365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
940465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
940565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
940665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* For an anchored pattern, we use the "required byte" only if it follows a
940765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvariable length item in the regex. Remove the caseless flag for non-caseable
940865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbytes. */
940965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
941065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (reqcharflags >= 0 &&
941165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     ((re->options & PCRE_ANCHORED) == 0 || (reqcharflags & REQ_VARY) != 0))
941265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
941365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
941465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  re->req_char = reqchar & 0xff;
941565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
941665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  re->req_char = reqchar & 0xffff;
941765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32
941865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  re->req_char = reqchar;
941965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
942065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if ((reqcharflags & REQ_CASELESS) != 0)
942165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
942265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
942365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /* We ignore non-ASCII first chars in 8 bit mode. */
942465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (utf)
942565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
942665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      if (re->req_char < 128)
942765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
942865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if (cd->fcc[re->req_char] != re->req_char)
942965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          re->flags |= PCRE_RCH_CASELESS;
943065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
943165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      else if (UCD_OTHERCASE(re->req_char) != re->req_char)
943265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        re->flags |= PCRE_RCH_CASELESS;
943365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
943465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else
943565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
943665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (MAX_255(re->req_char) && cd->fcc[re->req_char] != re->req_char)
943765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      re->flags |= PCRE_RCH_CASELESS;
943865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
943965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
944065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  re->flags |= PCRE_REQCHSET;
944165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
944265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
944365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Print out the compiled data if debugging is enabled. This is never the
944465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcase when building a production library. */
944565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
944665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef PCRE_DEBUG
944765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichprintf("Length = %d top_bracket = %d top_backref = %d\n",
944865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  length, re->top_bracket, re->top_backref);
944965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
945065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichprintf("Options=%08x\n", re->options);
945165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
945265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((re->flags & PCRE_FIRSTSET) != 0)
945365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
945465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uchar ch = re->first_char;
945565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const char *caseless =
945665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ((re->flags & PCRE_FCH_CASELESS) == 0)? "" : " (caseless)";
945765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (PRINTABLE(ch)) printf("First char = %c%s\n", ch, caseless);
945865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else printf("First char = \\x%02x%s\n", ch, caseless);
945965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
946065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
946165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((re->flags & PCRE_REQCHSET) != 0)
946265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
946365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_uchar ch = re->req_char;
946465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  const char *caseless =
946565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ((re->flags & PCRE_RCH_CASELESS) == 0)? "" : " (caseless)";
946665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (PRINTABLE(ch)) printf("Req char = %c%s\n", ch, caseless);
946765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else printf("Req char = \\x%02x%s\n", ch, caseless);
946865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
946965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
947065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
947165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_printint((pcre *)re, stdout, TRUE);
947265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
947365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre16_printint((pcre *)re, stdout, TRUE);
947465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32
947565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre32_printint((pcre *)re, stdout, TRUE);
947665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
947765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
947865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This check is done here in the debugging case so that the code that
947965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwas compiled can be seen. */
948065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
948165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (code - codestart > length)
948265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
948365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  (PUBL(free))(re);
948465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  *errorptr = find_error_text(ERR23);
948565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  *erroroffset = ptr - (pcre_uchar *)pattern;
948665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (errorcodeptr != NULL) *errorcodeptr = ERR23;
948765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return NULL;
948865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
948965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif   /* PCRE_DEBUG */
949065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
949165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Check for a pattern than can match an empty string, so that this information
949265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcan be provided to applications. */
949365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
949465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdo
949565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
949665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (could_be_empty_branch(codestart, code, utf, cd, NULL))
949765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
949865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    re->flags |= PCRE_MATCH_EMPTY;
949965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    break;
950065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
950165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  codestart += GET(codestart, 1);
950265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
950365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (*codestart == OP_ALT);
950465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
950565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8
950665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn (pcre *)re;
950765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16
950865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn (pcre16 *)re;
950965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32
951065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn (pcre32 *)re;
951165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif
951265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
951365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
951465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* End of pcre_compile.c */
951565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
9516