1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// regexcmp.h 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 41b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert// Copyright (C) 2002-2015, International Business Machines Corporation and others. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This file contains declarations for the class RegexCompile 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This class is internal to the regular expression implementation. 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// For the public Regular Expression API, see the file "unicode/regex.h" 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef RBBISCAN_H 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define RBBISCAN_H 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uobject.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parseerr.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uhash.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 25f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "uvectr32.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// class RegexCompile Contains the regular expression compiler. 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct RegexTableEl; 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass RegexPattern; 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 41f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusclass U_I18N_API RegexCompile : public UMemory { 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 44c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru enum { 45c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru kStackSize = 100 // The size of the state stack for 46c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; // pattern parsing. Corresponds roughly 47c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // to the depth of parentheses nesting 48c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that is allowed in the rules. 49c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru struct RegexPatternChar { 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 fChar; 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool fQuoted; 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexCompile(RegexPattern *rp, UErrorCode &e); 56c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void compile(const UnicodeString &pat, UParseError &pp, UErrorCode &e); 5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void compile(UText *pat, UParseError &pp, UErrorCode &e); 5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual ~RegexCompile(); 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void nextChar(RegexPatternChar &c); // Get the next char from the input stream. 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static void cleanup(); // Memory cleanup 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Categories of parentheses in pattern. 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The category is saved in the compile-time parentheses stack frame, and 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // determines the code to be generated when the matching close ) is encountered. 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru enum EParenClass { 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru plain = -1, // No special handling 74c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru capturing = -2, 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru atomic = -3, 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lookAhead = -4, 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru negLookAhead = -5, 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flags = -6, 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lookBehind = -7, 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lookBehindN = -8 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doParseActions(int32_t a); 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void error(UErrorCode e); // error reporting convenience function. 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 nextCharLL(); 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 peekCharLL(); 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *scanProp(); 92c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *scanPosixProp(); 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void handleCloseParen(); 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t blockTopLoc(UBool reserve); // Locate a position in the compiled pattern 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // at the top of the just completed block 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // or operation, and optionally ensure that 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // there is space to add an opcode there. 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void compileSet(UnicodeSet *theSet); // Generate the compiled pattern for 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a reference to a UnicodeSet. 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void compileInterval(int32_t InitOp, // Generate the code for a {min,max} quantifier. 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t LoopOp); 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool compileInlineInterval(); // Generate inline code for a {min,max} quantifier 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void literalChar(UChar32 c); // Compile a literal char 10483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius void fixLiterals(UBool split=FALSE); // Generate code for pending literal characters. 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void insertOp(int32_t where); // Open up a slot for a new op in the 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // generated code at the specified location. 1071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert void appendOp(int32_t op); // Append a new op to the compiled pattern. 1081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert void appendOp(int32_t type, int32_t val); // Build & append a new op to the compiled pattern. 1091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t buildOp(int32_t type, int32_t val); // Construct a new pcode instruction. 1101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t allocateData(int32_t size); // Allocate space in the matcher data area. 1111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Return index of the newly allocated data. 1121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t allocateStackData(int32_t size); // Allocate space in the match back-track stack frame. 1131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Return offset index in the frame. 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t minMatchLength(int32_t start, 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t end); 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t maxMatchLength(int32_t start, 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t end); 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void matchStartType(); 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void stripNOPs(); 120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru void setEval(int32_t op); 122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru void setPushOp(int32_t op); 123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 scanNamedChar(); 124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *createSetForProperty(const UnicodeString &propName, UBool negated); 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 126f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliuspublic: // Public for testing only. 127f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius static void U_EXPORT2 findCaseInsensitiveStarters(UChar32 c, UnicodeSet *starterChars); 128f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusprivate: 129f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *fStatus; 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *fRXPat; 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError *fParseErr; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Data associated with low level character scanning 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fScanIndex; // Index of current character being processed 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in the rule input string. 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool fQuoteMode; // Scan is in a \Q...\E quoted region 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool fInBackslashQuote; // Scan is between a '\' and the following char. 142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool fEOLComments; // When scan is just after '(?', inhibit #... to 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // end of line comments, in favor of (?#...) comments. 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fLineNum; // Line number in input file. 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fCharNum; // Char position within the line. 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 fLastChar; // Previous char, needed to count CR-LF 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // as a single line, not two. 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 fPeekChar; // Saved char, if we've scanned ahead. 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPatternChar fC; // Current char for parse state machine 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // processing. 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Data for the state machine that parses the regular expression. 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexTableEl **fStateTable; // State Transition Table for regex Rule 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // parsing. index by p[state][char-class] 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t fStack[kStackSize]; // State stack, holds state pushes 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fStackPtr; // and pops as specified in the state 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transition rules. 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Data associated with the generation of the pcode for the match engine 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fModeFlags; // Match Flags. (Case Insensitive, etc.) 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Always has high bit (31) set so that flag values 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // on the paren stack are distinguished from relocatable 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // pcode addresses. 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fNewModeFlags; // New flags, while compiling (?i, holds state 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // until last flag is scanned. 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool fSetModeFlag; // true for (?ismx, false for (?-ismx 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 17583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UnicodeString fLiteralChars; // Literal chars or strings from the pattern are accumulated here. 17683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // Once completed, meaning that some non-literal pattern 17783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // construct is encountered, the appropriate opcodes 17883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // to match the literal will be generated, and this 17983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // string will be cleared. 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fPatternLength; // Length of the input pattern string. 18250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector32 fParenStack; // parentheses stack. Each frame consists of 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the positions of compiled pattern operations 185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // needing fixup, followed by negative value. The 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // first entry in each frame is the position of the 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spot reserved for use when a quantifier 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // needs to add a SAVE at the start of a (block) 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The negative value (-1, -2,...) indicates 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the kind of paren that opened the frame. Some 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // need special handling on close. 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fMatchOpenParen; // The position in the compiled pattern 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // of the slot reserved for a state save 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // at the start of the most recently processed 1971b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // parenthesized block. Updated when processing 1981b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // a close to the location for the corresponding open. 1991b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fMatchCloseParen; // The position in the pattern of the first 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // location after the most recently processed 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // parenthesized block. 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fIntervalLow; // {lower, upper} interval quantifier values. 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fIntervalUpper; // Placed here temporarily, when pattern is 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // initially scanned. Each new interval 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // encountered overwrites these values. 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // -1 for the upper interval value means none 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // was specified (unlimited occurences.) 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fNameStartPos; // Starting position of a \N{NAME} name in a 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // pattern, valid while remainder of name is 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // scanned. 214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UStack fSetStack; // Stack of UnicodeSets, used while evaluating 216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // (at compile time) set expressions within 217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the pattern. 218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UStack fSetOpStack; // Stack of pending set operators (&&, --, union) 219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 fLastSetLiteral; // The last single code point added to a set. 221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // needed when "-y" is scanned, and we need 222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // to turn "x-y" into a range. 2231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 2241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UnicodeString *fCaptureName; // Named Capture, the group name is built up 2251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // in this string while being scanned. 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Constant values to be pushed onto fSetOpStack while scanning & evalueating [set expressions] 229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// The high 16 bits are the operator precedence, and the low 16 are a code for the operation itself. 230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruenum SetOperations { 232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setStart = 0 << 16 | 1, 233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setEnd = 1 << 16 | 2, 234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setNegation = 2 << 16 | 3, 235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setCaseClose = 2 << 16 | 9, 236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setDifference2 = 3 << 16 | 4, // '--' set difference operator 237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setIntersection2 = 3 << 16 | 5, // '&&' set intersection operator 238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setUnion = 4 << 16 | 6, // implicit union of adjacent items 239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setDifference1 = 4 << 16 | 7, // '-', single dash difference op, for compatibility with old UnicodeSet. 240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setIntersection1 = 4 << 16 | 8 // '&', single amp intersection op, for compatibility with old UnicodeSet. 241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // RBBISCAN_H 246