15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2006 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// --- SPONSORED LINK --------------------------------------------------
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If you want to use this library for regular expression matching,
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// you should use re2/re2.h, which provides a class RE2 that
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// mimics the PCRE interface provided by PCRE's C++ wrappers.
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This header describes the low-level interface used to implement RE2
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and may change in backwards-incompatible ways from time to time.
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// In contrast, RE2's interface will not.
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ---------------------------------------------------------------------
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Regular expression library: parsing, execution, and manipulation
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// of regular expressions.
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Any operation that traverses the Regexp structures should be written
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// using Regexp::Walker (see walker-inl.h), not recursively, because deeply nested
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regular expressions such as x++++++++++++++++++++... might cause recursive
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// traversals to overflow the stack.
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// It is the caller's responsibility to provide appropriate mutual exclusion
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// around manipulation of the regexps.  RE2 does this.
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// PARSING
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Regexp::Parse parses regular expressions encoded in UTF-8.
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The default syntax is POSIX extended regular expressions,
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// with the following changes:
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   1.  Backreferences (optional in POSIX EREs) are not supported.
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//         (Supporting them precludes the use of DFA-based
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//          matching engines.)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   2.  Collating elements and collation classes are not supported.
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//         (No one has needed or wanted them.)
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The exact syntax accepted can be modified by passing flags to
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Regexp::Parse.  In particular, many of the basic Perl additions
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// are available.  The flags are documented below (search for LikePerl).
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If parsed with the flag Regexp::Latin1, both the regular expression
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and the input to the matching routines are assumed to be encoded in
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Latin-1, not UTF-8.
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// EXECUTION
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Once Regexp has parsed a regular expression, it provides methods
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// to search text using that regular expression.  These methods are
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// implemented via calling out to other regular expression libraries.
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (Let's call them the sublibraries.)
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To call a sublibrary, Regexp does not simply prepare a
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// string version of the regular expression and hand it to the
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// sublibrary.  Instead, Regexp prepares, from its own parsed form, the
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// corresponding internal representation used by the sublibrary.
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This has the drawback of needing to know the internal representation
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// used by the sublibrary, but it has two important benefits:
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   1. The syntax and meaning of regular expressions is guaranteed
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      to be that used by Regexp's parser, not the syntax expected
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      by the sublibrary.  Regexp might accept a restricted or
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      expanded syntax for regular expressions as compared with
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      the sublibrary.  As long as Regexp can translate from its
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      internal form into the sublibrary's, clients need not know
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      exactly which sublibrary they are using.
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   2. The sublibrary parsers are bypassed.  For whatever reason,
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      sublibrary regular expression parsers often have security
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      problems.  For example, plan9grep's regular expression parser
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      has a buffer overflow in its handling of large character
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      classes, and PCRE's parser has had buffer overflow problems
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      in the past.  Security-team requires sandboxing of sublibrary
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      regular expression parsers.  Avoiding the sublibrary parsers
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//      avoids the sandbox.
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The execution methods we use now are provided by the compiled form,
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Prog, described in prog.h
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// MANIPULATION
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Unlike other regular expression libraries, Regexp makes its parsed
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// form accessible to clients, so that client code can analyze the
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// parsed regular expressions.
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef RE2_REGEXP_H__
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define RE2_REGEXP_H__
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h"
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/stringpiece.h"
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Keep in sync with string list kOpcodeNames[] in testing/dump.cc
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum RegexpOp {
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches no strings.
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpNoMatch = 1,
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches empty string.
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpEmptyMatch,
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches rune_.
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpLiteral,
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches runes_.
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpLiteralString,
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches concatenation of sub_[0..nsub-1].
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpConcat,
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches union of sub_[0..nsub-1].
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpAlternate,
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches sub_[0] zero or more times.
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpStar,
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches sub_[0] one or more times.
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpPlus,
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches sub_[0] zero or one times.
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpQuest,
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches sub_[0] at least min_ times, at most max_ times.
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // max_ == -1 means no upper limit.
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpRepeat,
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Parenthesized (capturing) subexpression.  Index is cap_.
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Optionally, capturing name is name_.
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpCapture,
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches any character.
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpAnyChar,
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches any byte [sic].
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpAnyByte,
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches empty string at beginning of line.
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpBeginLine,
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches empty string at end of line.
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpEndLine,
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches word boundary "\b".
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpWordBoundary,
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches not-a-word boundary "\B".
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpNoWordBoundary,
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches empty string at beginning of text.
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpBeginText,
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches empty string at end of text.
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpEndText,
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Matches character class given by cc_.
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpCharClass,
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Forces match of entire expression right now,
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // with match ID match_id_ (used by RE2::Set).
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpHaveMatch,
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kMaxRegexpOp = kRegexpHaveMatch,
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Keep in sync with string list in regexp.cc
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum RegexpStatusCode {
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // No error
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpSuccess = 0,
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Unexpected error
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpInternalError,
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Parse errors
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpBadEscape,          // bad escape sequence
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpBadCharClass,       // bad character class
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpBadCharRange,       // bad character class range
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpMissingBracket,     // missing closing ]
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpMissingParen,       // missing closing )
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpTrailingBackslash,  // at end of regexp
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpRepeatArgument,     // repeat argument missing, e.g. "*"
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpRepeatSize,         // bad repetition argument
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpRepeatOp,           // bad repetition operator
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpBadPerlOp,          // bad perl operator
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpBadUTF8,            // invalid UTF-8 in regexp
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kRegexpBadNamedCapture,    // bad named capture
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Error status for certain operations.
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class RegexpStatus {
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {}
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~RegexpStatus() { delete tmp_; }
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void set_code(enum RegexpStatusCode code) { code_ = code; }
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; }
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void set_tmp(string* tmp) { delete tmp_; tmp_ = tmp; }
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  enum RegexpStatusCode code() const { return code_; }
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const StringPiece& error_arg() const { return error_arg_; }
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool ok() const { return code() == kRegexpSuccess; }
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Copies state from status.
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Copy(const RegexpStatus& status);
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns text equivalent of code, e.g.:
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   "Bad character class"
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static string CodeText(enum RegexpStatusCode code);
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns text describing error, e.g.:
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   "Bad character class: [z-a]"
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string Text() const;
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  enum RegexpStatusCode code_;  // Kind of error
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece error_arg_;       // Piece of regexp containing syntax error.
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string* tmp_;                 // Temporary storage, possibly where error_arg_ is.
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_EVIL_CONSTRUCTORS(RegexpStatus);
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Walker to implement Simplify.
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class SimplifyWalker;
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Compiled form; see prog.h
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Prog;
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct RuneRange {
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RuneRange() : lo(0), hi(0) { }
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RuneRange(int l, int h) : lo(l), hi(h) { }
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Rune lo;
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Rune hi;
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Less-than on RuneRanges treats a == b if they overlap at all.
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This lets us look in a set to find the range covering a particular Rune.
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct RuneRangeLess {
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool operator()(const RuneRange& a, const RuneRange& b) const {
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return a.hi < b.lo;
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class CharClassBuilder;
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class CharClass {
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Delete();
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef RuneRange* iterator;
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  iterator begin() { return ranges_; }
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  iterator end() { return ranges_ + nranges_; }
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int size() { return nrunes_; }
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool empty() { return nrunes_ == 0; }
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool full() { return nrunes_ == Runemax+1; }
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool FoldsASCII() { return folds_ascii_; }
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Contains(Rune r);
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CharClass* Negate();
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CharClass();  // not implemented
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~CharClass();  // not implemented
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static CharClass* New(int maxranges);
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  friend class CharClassBuilder;
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool folds_ascii_;
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nrunes_;
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RuneRange *ranges_;
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nranges_;
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_EVIL_CONSTRUCTORS(CharClass);
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Regexp {
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Flags for parsing.  Can be ORed together.
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  enum ParseFlags {
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NoParseFlags = 0,
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    FoldCase     = 1<<0,   // Fold case during matching (case-insensitive).
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Literal      = 1<<1,   // Treat s as literal string instead of a regexp.
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ClassNL      = 1<<2,   // Allow char classes like [^a-z] and \D and \s
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           // and [[:space:]] to match newline.
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DotNL        = 1<<3,   // Allow . to match newline.
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    MatchNL      = ClassNL | DotNL,
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    OneLine      = 1<<4,   // Treat ^ and $ as only matching at beginning and
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           // end of text, not around embedded newlines.
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           // (Perl's default)
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Latin1       = 1<<5,   // Regexp and text are in Latin1, not UTF-8.
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NonGreedy    = 1<<6,   // Repetition operators are non-greedy by default.
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    PerlClasses  = 1<<7,   // Allow Perl character classes like \d.
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    PerlB        = 1<<8,   // Allow Perl's \b and \B.
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    PerlX        = 1<<9,   // Perl extensions:
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //   non-capturing parens - (?: )
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //   non-greedy operators - *? +? ?? {}?
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //   flag edits - (?i) (?-i) (?i: )
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //     i - FoldCase
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //     m - !OneLine
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //     s - DotNL
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //     U - NonGreedy
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //   line ends: \A \z
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //   \Q and \E to disable/enable metacharacters
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //   (?P<name>expr) for named captures
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //   \C to match any single byte
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //   and \P{Han} for its negation.
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NeverNL      = 1<<11,  // Never match NL, even if the regexp mentions
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           //   it explicitly.
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NeverCapture = 1<<12,  // Parse all parens as non-capturing.
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // As close to Perl as we can get.
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LikePerl     = ClassNL | OneLine | PerlClasses | PerlB | PerlX |
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   UnicodeGroups,
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Internal use only.
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WasDollar    = 1<<15,  // on kRegexpEndText: was $ in regexp text
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Get.  No set, Regexps are logically immutable once created.
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RegexpOp op() { return static_cast<RegexpOp>(op_); }
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nsub() { return nsub_; }
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool simple() { return simple_; }
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  enum ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int Ref();  // For testing.
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Regexp** sub() {
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if(nsub_ <= 1)
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return &subone_;
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return submany_;
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int min() { DCHECK_EQ(op_, kRegexpRepeat); return min_; }
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int max() { DCHECK_EQ(op_, kRegexpRepeat); return max_; }
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; }
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; }
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; }
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; }
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; }
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; }
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Increments reference count, returns object as convenience.
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Regexp* Incref();
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Decrements reference count and deletes this object if count reaches 0.
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Decref();
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Parses string s to produce regular expression, returned.
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Caller must release return value with re->Decref().
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // On failure, sets *status (if status != NULL) and returns NULL.
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* Parse(const StringPiece& s, ParseFlags flags,
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       RegexpStatus* status);
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns a _new_ simplified version of the current regexp.
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Does not edit the current regexp.
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Caller must release return value with re->Decref().
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Simplified means that counted repetition has been rewritten
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // into simpler terms and all Perl/POSIX features have been
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // removed.  The result will capture exactly the same
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // subexpressions the original did, unless formatted with ToString.
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Regexp* Simplify();
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  friend class SimplifyWalker;
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Parses the regexp src and then simplifies it and sets *dst to the
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // string representation of the simplified form.  Returns true on success.
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns false and sets *status (if status != NULL) on parse error.
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags,
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             string* dst,
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             RegexpStatus* status);
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the number of capturing groups in the regexp.
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int NumCaptures();
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  friend class NumCapturesWalker;
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns a map from names to capturing group indices,
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // or NULL if the regexp contains no named capture groups.
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The caller is responsible for deleting the map.
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  map<string, int>* NamedCaptures();
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns a map from capturing group indices to capturing group
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // names or NULL if the regexp contains no named capture groups. The
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // caller is responsible for deleting the map.
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  map<int, string>* CaptureNames();
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns a string representation of the current regexp,
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // using as few parentheses as possible.
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string ToString();
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Convenience functions.  They consume the passed reference,
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // so in many cases you should use, e.g., Plus(re->Incref(), flags).
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // They do not consume allocated arrays like subs or runes.
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* Plus(Regexp* sub, ParseFlags flags);
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* Star(Regexp* sub, ParseFlags flags);
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* Quest(Regexp* sub, ParseFlags flags);
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* Concat(Regexp** subs, int nsubs, ParseFlags flags);
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* Alternate(Regexp** subs, int nsubs, ParseFlags flags);
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* Capture(Regexp* sub, ParseFlags flags, int cap);
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* Repeat(Regexp* sub, ParseFlags flags, int min, int max);
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* NewLiteral(Rune rune, ParseFlags flags);
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* NewCharClass(CharClass* cc, ParseFlags flags);
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* LiteralString(Rune* runes, int nrunes, ParseFlags flags);
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* HaveMatch(int match_id, ParseFlags flags);
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Like Alternate but does not factor out common prefixes.
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* AlternateNoFactor(Regexp** subs, int nsubs, ParseFlags flags);
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Debugging function.  Returns string format for regexp
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that makes structure clear.  Does NOT use regexp syntax.
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string Dump();
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Helper traversal class, defined fully in walker-inl.h.
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  template<typename T> class Walker;
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Compile to Prog.  See prog.h
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Reverse prog expects to be run over text backward.
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Construction and execution of prog will
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // stay within approximately max_mem bytes of memory.
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If max_mem <= 0, a reasonable default is used.
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prog* CompileToProg(int64 max_mem);
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prog* CompileToReverseProg(int64 max_mem);
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Whether to expect this library to find exactly the same answer as PCRE
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // when running this regexp.  Most regexps do mimic PCRE exactly, but a few
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // obscure cases behave differently.  Technically this is more a property
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // of the Prog than the Regexp, but the computation is much easier to do
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // on the Regexp.  See mimics_pcre.cc for the exact conditions.
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool MimicsPCRE();
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Benchmarking function.
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void NullWalk();
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Whether every match of this regexp must be anchored and
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // begin with a non-empty fixed string (perhaps after ASCII
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // case-folding).  If so, returns the prefix and the sub-regexp that
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // follows it.
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool RequiredPrefix(string* prefix, bool *foldcase, Regexp** suffix);
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Constructor allocates vectors as appropriate for operator.
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit Regexp(RegexpOp op, ParseFlags parse_flags);
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Use Decref() instead of delete to release Regexps.
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This is private to catch deletes at compile time.
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~Regexp();
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Destroy();
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool QuickDestroy();
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Helpers for Parse.  Listed here so they can edit Regexps.
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  class ParseState;
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  friend class ParseState;
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  friend bool ParseCharClass(StringPiece* s, Regexp** out_re,
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             RegexpStatus* status);
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Helper for testing [sic].
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  friend bool RegexpEqualTestingOnly(Regexp*, Regexp*);
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Computes whether Regexp is already simple.
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool ComputeSimple();
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Constructor that generates a concatenation or alternation,
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // enforcing the limit on the number of subexpressions for
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a particular Regexp.
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* ConcatOrAlternate(RegexpOp op, Regexp** subs, int nsubs,
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   ParseFlags flags, bool can_factor);
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the leading string that re starts with.
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The returned Rune* points into a piece of re,
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // so it must not be used after the caller calls re->Decref().
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Rune* LeadingString(Regexp* re, int* nrune, ParseFlags* flags);
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Removes the first n leading runes from the beginning of re.
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Edits re in place.
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static void RemoveLeadingString(Regexp* re, int n);
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the leading regexp in re's top-level concatenation.
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The returned Regexp* points at re or a sub-expression of re,
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // so it must not be used after the caller calls re->Decref().
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* LeadingRegexp(Regexp* re);
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Removes LeadingRegexp(re) from re and returns the remainder.
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Might edit re in place.
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Regexp* RemoveLeadingRegexp(Regexp* re);
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Simplifies an alternation of literal strings by factoring out
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // common prefixes.
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags);
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static int FactorAlternationRecursive(Regexp** sub, int nsub,
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                        ParseFlags flags, int maxdepth);
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Is a == b?  Only efficient on regexps that have not been through
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Simplify yet - the expansion of a kRegexpRepeat will make this
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // take a long time.  Do not call on such regexps, hence private.
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static bool Equal(Regexp* a, Regexp* b);
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Allocate space for n sub-regexps.
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AllocSub(int n) {
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (n < 0 || static_cast<uint16>(n) != n)
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      LOG(FATAL) << "Cannot AllocSub " << n;
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (n > 1)
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      submany_ = new Regexp*[n];
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    nsub_ = n;
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add Rune to LiteralString
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AddRuneToString(Rune r);
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Swaps this with that, in place.
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Swap(Regexp *that);
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Operator.  See description of operators above.
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // uint8 instead of RegexpOp to control space usage.
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint8 op_;
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Is this regexp structure already simple
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // (has it been returned by Simplify)?
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // uint8 instead of bool to control space usage.
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint8 simple_;
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Flags saved from parsing and used during execution.
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // (Only FoldCase is used.)
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // uint16 instead of ParseFlags to control space usage.
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint16 parse_flags_;
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Reference count.  Exists so that SimplifyRegexp can build
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // regexp structures that are dags rather than trees to avoid
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // exponential blowup in space requirements.
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // uint16 to control space usage.
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The standard regexp routines will never generate a
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // ref greater than the maximum repeat count (100),
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // but even so, Incref and Decref consult an overflow map
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // when ref_ reaches kMaxRef.
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint16 ref_;
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const uint16 kMaxRef = 0xffff;
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Subexpressions.
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // uint16 to control space usage.
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Concat and Alternate handle larger numbers of subexpressions
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // by building concatenation or alternation trees.
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Other routines should call Concat or Alternate instead of
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // filling in sub() by hand.
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint16 nsub_;
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const uint16 kMaxNsub = 0xffff;
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  union {
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Regexp** submany_;  // if nsub_ > 1
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Regexp* subone_;  // if nsub_ == 1
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Extra space for parse and teardown stacks.
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Regexp* down_;
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Arguments to operator.  See description of operators above.
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  union {
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    struct {  // Repeat
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int max_;
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int min_;
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    };
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    struct {  // Capture
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int cap_;
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      string* name_;
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    };
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    struct {  // LiteralString
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int nrunes_;
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Rune* runes_;
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    };
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    struct {  // CharClass
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // These two could be in separate union members,
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // but it wouldn't save any space (there are other two-word structs)
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // and keeping them separate avoids confusion during parsing.
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CharClass* cc_;
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CharClassBuilder* ccb_;
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    };
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Rune rune_;  // Literal
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int match_id_;  // HaveMatch
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    void *the_union_[2];  // as big as any other element, for memset
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_EVIL_CONSTRUCTORS(Regexp);
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Character class set: contains non-overlapping, non-abutting RuneRanges.
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef set<RuneRange, RuneRangeLess> RuneRangeSet;
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class CharClassBuilder {
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CharClassBuilder();
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef RuneRangeSet::iterator iterator;
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  iterator begin() { return ranges_.begin(); }
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  iterator end() { return ranges_.end(); }
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int size() { return nrunes_; }
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool empty() { return nrunes_ == 0; }
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool full() { return nrunes_ == Runemax+1; }
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Contains(Rune r);
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool FoldsASCII();
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool AddRange(Rune lo, Rune hi);  // returns whether class changed
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CharClassBuilder* Copy();
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AddCharClass(CharClassBuilder* cc);
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Negate();
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void RemoveAbove(Rune r);
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CharClass* GetCharClass();
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags);
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const uint32 AlphaMask = (1<<26) - 1;
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 upper_;  // bitmap of A-Z
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 lower_;  // bitmap of a-z
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nrunes_;
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RuneRangeSet ranges_;
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_EVIL_CONSTRUCTORS(CharClassBuilder);
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Tell g++ that bitwise ops on ParseFlags produce ParseFlags.
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline Regexp::ParseFlags operator|(Regexp::ParseFlags a, Regexp::ParseFlags b)
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return static_cast<Regexp::ParseFlags>(static_cast<int>(a) | static_cast<int>(b));
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline Regexp::ParseFlags operator^(Regexp::ParseFlags a, Regexp::ParseFlags b)
6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return static_cast<Regexp::ParseFlags>(static_cast<int>(a) ^ static_cast<int>(b));
6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline Regexp::ParseFlags operator&(Regexp::ParseFlags a, Regexp::ParseFlags b)
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return static_cast<Regexp::ParseFlags>(static_cast<int>(a) & static_cast<int>(b));
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline Regexp::ParseFlags operator~(Regexp::ParseFlags a)
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return static_cast<Regexp::ParseFlags>(~static_cast<int>(a));
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // RE2_REGEXP_H__
634