15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Regular expression interface RE2.
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Originally the PCRE C++ wrapper, but adapted to use
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the new automata-based regular expression engines.
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/re2.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdio.h>
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef WIN32
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define strtoll _strtoi64
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define strtoull _strtoui64
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define strtof strtod
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#else
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <pthread.h>
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <errno.h>
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h"
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/flags.h"
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prog.h"
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/regexp.h"
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_bool(trace_re2, false, "trace RE2 execution");
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Maximum number of args we can set
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int kMaxArgs = 16;
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int kVecSize = 1+kMaxArgs;
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::FullMatchN> RE2::FullMatch = {};
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::PartialMatchN> RE2::PartialMatch = {};
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume = {};
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndConsumeN> RE2::FindAndConsume = {};
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define kDefaultMaxMem (8<<20)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::Options::Options()
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  :  encoding_(EncodingUTF8),
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     posix_syntax_(false),
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     longest_match_(false),
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     log_errors_(true),
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     max_mem_(kDefaultMaxMem),
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     literal_(false),
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     never_nl_(false),
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     never_capture_(false),
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     case_sensitive_(true),
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     perl_classes_(false),
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     word_boundary_(false),
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     one_line_(false) {
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::Options::Options(RE2::CannedOptions opt)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    posix_syntax_(opt == RE2::POSIX_SYNTAX),
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    longest_match_(opt == RE2::POSIX_SYNTAX),
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    log_errors_(opt != RE2::Quiet),
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    max_mem_(kDefaultMaxMem),
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    literal_(false),
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    never_nl_(false),
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    never_capture_(false),
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case_sensitive_(true),
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    perl_classes_(false),
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    word_boundary_(false),
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    one_line_(false) {
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static empty things for use as const references.
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To avoid global constructors, initialized on demand.
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)GLOBAL_MUTEX(empty_mutex);
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const string *empty_string;
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const map<string, int> *empty_named_groups;
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const map<int, string> *empty_group_names;
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void InitEmpty() {
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GLOBAL_MUTEX_LOCK(empty_mutex);
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (empty_string == NULL) {
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    empty_string = new string;
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    empty_named_groups = new map<string, int>;
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    empty_group_names = new map<int, string>;
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GLOBAL_MUTEX_UNLOCK(empty_mutex);
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Converts from Regexp error code to RE2 error code.
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Maybe some day they will diverge.  In any event, this
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// hides the existence of Regexp from RE2 users.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (code) {
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpSuccess:
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::NoError;
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpInternalError:
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorInternal;
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpBadEscape:
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorBadEscape;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpBadCharClass:
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorBadCharClass;
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpBadCharRange:
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorBadCharRange;
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpMissingBracket:
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorMissingBracket;
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpMissingParen:
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorMissingParen;
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpTrailingBackslash:
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorTrailingBackslash;
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpRepeatArgument:
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorRepeatArgument;
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpRepeatSize:
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorRepeatSize;
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpRepeatOp:
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorRepeatOp;
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpBadPerlOp:
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorBadPerlOp;
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpBadUTF8:
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorBadUTF8;
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case re2::kRegexpBadNamedCapture:
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return RE2::ErrorBadNamedCapture;
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return RE2::ErrorInternal;
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static string trunc(const StringPiece& pattern) {
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (pattern.size() < 100)
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return pattern.as_string();
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return pattern.substr(0, 100).as_string() + "...";
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::RE2(const char* pattern) {
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Init(pattern, DefaultOptions);
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::RE2(const string& pattern) {
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Init(pattern, DefaultOptions);
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::RE2(const StringPiece& pattern) {
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Init(pattern, DefaultOptions);
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::RE2(const StringPiece& pattern, const Options& options) {
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Init(pattern, options);
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int RE2::Options::ParseFlags() const {
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int flags = Regexp::ClassNL;
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (encoding()) {
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    default:
1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (log_errors())
1542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        LOG(ERROR) << "Unknown encoding " << encoding();
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case RE2::Options::EncodingUTF8:
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case RE2::Options::EncodingLatin1:
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      flags |= Regexp::Latin1;
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!posix_syntax())
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    flags |= Regexp::LikePerl;
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (literal())
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    flags |= Regexp::Literal;
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (never_nl())
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    flags |= Regexp::NeverNL;
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (never_capture())
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    flags |= Regexp::NeverCapture;
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!case_sensitive())
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    flags |= Regexp::FoldCase;
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (perl_classes())
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    flags |= Regexp::PerlClasses;
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (word_boundary())
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    flags |= Regexp::PerlB;
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (one_line())
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    flags |= Regexp::OneLine;
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return flags;
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void RE2::Init(const StringPiece& pattern, const Options& options) {
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  mutex_ = new Mutex;
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  pattern_ = pattern.as_string();
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  options_.Copy(options);
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  InitEmpty();
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  error_ = empty_string;
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  error_code_ = NoError;
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  suffix_regexp_ = NULL;
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  entire_regexp_ = NULL;
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  prog_ = NULL;
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  rprog_ = NULL;
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  named_groups_ = NULL;
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  group_names_ = NULL;
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  num_captures_ = -1;
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RegexpStatus status;
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  entire_regexp_ = Regexp::Parse(
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pattern_,
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    &status);
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (entire_regexp_ == NULL) {
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (error_ == empty_string)
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      error_ = new string(status.Text());
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (options_.log_errors()) {
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 << status.Text();
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    error_arg_ = status.error_arg().as_string();
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    error_code_ = RegexpErrorToRE2(status.code());
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  prefix_.clear();
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  prefix_foldcase_ = false;
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  re2::Regexp* suffix;
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix))
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    suffix_regexp_ = suffix;
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  else
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    suffix_regexp_ = entire_regexp_->Incref();
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Two thirds of the memory goes to the forward Prog,
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // one third to the reverse prog, because the forward
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Prog has two DFAs but the reverse prog has one.
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (prog_ == NULL) {
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (options_.log_errors())
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    error_ = new string("pattern too large - compile failed");
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    error_code_ = RE2::ErrorPatternTooLarge;
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Could delay this until the first match call that
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // cares about submatch information, but the one-pass
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // machine's memory gets cut from the DFA memory budget,
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // and that is harder to do if the DFA has already
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // been built.
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  is_one_pass_ = prog_->IsOnePass();
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns rprog_, computing it if needed.
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)re2::Prog* RE2::ReverseProg() const {
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  MutexLock l(mutex_);
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (rprog_ == NULL && error_ == empty_string) {
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    rprog_ = suffix_regexp_->CompileToReverseProg(options_.max_mem()/3);
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (rprog_ == NULL) {
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (options_.log_errors())
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        LOG(ERROR) << "Error reverse compiling '" << trunc(pattern_) << "'";
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      error_ = new string("pattern too large - reverse compile failed");
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      error_code_ = RE2::ErrorPatternTooLarge;
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return NULL;
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return rprog_;
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::~RE2() {
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (suffix_regexp_)
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    suffix_regexp_->Decref();
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (entire_regexp_)
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    entire_regexp_->Decref();
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete mutex_;
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete prog_;
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete rprog_;
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (error_ != empty_string)
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete error_;
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (named_groups_ != NULL && named_groups_ != empty_named_groups)
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete named_groups_;
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (group_names_ != NULL &&  group_names_ != empty_group_names)
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete group_names_;
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int RE2::ProgramSize() const {
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (prog_ == NULL)
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return -1;
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return prog_->size();
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns named_groups_, computing it if needed.
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const map<string, int>&  RE2::NamedCapturingGroups() const {
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  MutexLock l(mutex_);
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!ok())
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return *empty_named_groups;
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (named_groups_ == NULL) {
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    named_groups_ = suffix_regexp_->NamedCaptures();
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (named_groups_ == NULL)
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      named_groups_ = empty_named_groups;
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return *named_groups_;
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns group_names_, computing it if needed.
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const map<int, string>&  RE2::CapturingGroupNames() const {
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  MutexLock l(mutex_);
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!ok())
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return *empty_group_names;
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (group_names_ == NULL) {
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    group_names_ = suffix_regexp_->CaptureNames();
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (group_names_ == NULL)
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      group_names_ = empty_group_names;
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return *group_names_;
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/***** Convenience interfaces *****/
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::FullMatchN(const StringPiece& text, const RE2& re,
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     const Arg* const args[], int n) {
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::PartialMatchN(const StringPiece& text, const RE2& re,
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        const Arg* const args[], int n) {
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return re.DoMatch(text, UNANCHORED, NULL, args, n);
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::ConsumeN(StringPiece* input, const RE2& re,
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   const Arg* const args[], int n) {
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int consumed;
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->remove_prefix(consumed);
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          const Arg* const args[], int n) {
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int consumed;
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->remove_prefix(consumed);
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns the maximum submatch needed for the rewrite to be done by Replace().
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// E.g. if rewrite == "foo \\2,\\1", returns 2.
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int RE2::MaxSubmatch(const StringPiece& rewrite) {
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int max = 0;
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (const char *s = rewrite.data(), *end = s + rewrite.size();
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       s < end; s++) {
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (*s == '\\') {
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      s++;
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int c = (s < end) ? *s : -1;
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (isdigit(c)) {
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        int n = (c - '0');
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (n > max)
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          max = n;
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return max;
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Replace(string *str,
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 const RE2& re,
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 const StringPiece& rewrite) {
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece vec[kVecSize];
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nvec = 1 + MaxSubmatch(rewrite);
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (nvec > arraysize(vec))
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string s;
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!re.Rewrite(&s, rewrite, vec, nvec))
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert(vec[0].begin() >= str->data());
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert(vec[0].end() <= str->data()+str->size());
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  str->replace(vec[0].data() - str->data(), vec[0].size(), s);
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int RE2::GlobalReplace(string *str,
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      const RE2& re,
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      const StringPiece& rewrite) {
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece vec[kVecSize];
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nvec = 1 + MaxSubmatch(rewrite);
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (nvec > arraysize(vec))
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* p = str->data();
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* ep = p + str->size();
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* lastend = NULL;
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string out;
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int count = 0;
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (p <= ep) {
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!re.Match(*str, p - str->data(), str->size(), UNANCHORED, vec, nvec))
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (p < vec[0].begin())
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      out.append(p, vec[0].begin() - p);
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (vec[0].begin() == lastend && vec[0].size() == 0) {
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Disallow empty match at end of last match: skip ahead.
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (p < ep)
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        out.append(p, 1);
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      p++;
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    re.Rewrite(&out, rewrite, vec, nvec);
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    p = vec[0].end();
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lastend = p;
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    count++;
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (count == 0)
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0;
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (p < ep)
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    out.append(p, ep - p);
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  swap(out, *str);
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return count;
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Extract(const StringPiece &text,
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 const RE2& re,
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 const StringPiece &rewrite,
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 string *out) {
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece vec[kVecSize];
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nvec = 1 + MaxSubmatch(rewrite);
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (nvec > arraysize(vec))
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  out->clear();
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return re.Rewrite(out, rewrite, vec, nvec);
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)string RE2::QuoteMeta(const StringPiece& unquoted) {
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string result;
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  result.reserve(unquoted.size() << 1);
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Escape any ascii character not in [A-Za-z_0-9].
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Note that it's legal to escape a character even if it has no
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // special meaning in a regular expression -- so this function does
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that.  (This also makes it identical to the perl function of the
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // same name except for the null-character special case;
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // see `perldoc -f quotemeta`.)
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int ii = 0; ii < unquoted.length(); ++ii) {
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Note that using 'isalnum' here raises the benchmark time from
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // 32ns to 58ns:
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (unquoted[ii] < '0' || unquoted[ii] > '9') &&
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        unquoted[ii] != '_' &&
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // If this is the part of a UTF8 or Latin1 character, we need
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // to copy this byte without escaping.  Experimentally this is
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // what works correctly with the regexp library.
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        !(unquoted[ii] & 128)) {
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (unquoted[ii] == '\0') {  // Special handling for null chars.
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Note that this special handling is not strictly required for RE2,
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // but this quoting is required for other regexp libraries such as
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // PCRE.
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Can't use "\\0" since the next character might be a digit.
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        result += "\\x00";
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      result += '\\';
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    result += unquoted[ii];
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return result;
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const {
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (prog_ == NULL)
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int n = prefix_.size();
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n > maxlen)
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    n = maxlen;
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Determine initial min max from prefix_ literal.
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string pmin, pmax;
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  pmin = prefix_.substr(0, n);
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  pmax = prefix_.substr(0, n);
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (prefix_foldcase_) {
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // prefix is ASCII lowercase; change pmin to uppercase.
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int i = 0; i < n; i++) {
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if ('a' <= pmin[i] && pmin[i] <= 'z')
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        pmin[i] += 'A' - 'a';
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add to prefix min max using PossibleMatchRange on regexp.
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string dmin, dmax;
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  maxlen -= n;
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pmin += dmin;
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pmax += dmax;
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (pmax.size() > 0) {
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // prog_->PossibleMatchRange has failed us,
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // but we still have useful information from prefix_.
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Round up pmax to allow any possible suffix.
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pmax = PrefixSuccessor(pmax);
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Nothing useful.
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *min = "";
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *max = "";
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *min = pmin;
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *max = pmax;
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Avoid possible locale nonsense in standard strcasecmp.
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The string a is known to be all lowercase.
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int ascii_strcasecmp(const char* a, const char* b, int len) {
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *ae = a + len;
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (; a < ae; a++, b++) {
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint8 x = *a;
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint8 y = *b;
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ('A' <= y && y <= 'Z')
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      y += 'a' - 'A';
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (x != y)
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return x - y;
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return 0;
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/***** Actual matching and rewriting code *****/
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Match(const StringPiece& text,
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                int startpos,
5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                int endpos,
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                Anchor re_anchor,
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                StringPiece* submatch,
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                int nsubmatch) const {
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!ok() || suffix_regexp_ == NULL) {
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (options_.log_errors())
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      LOG(ERROR) << "Invalid RE2: " << *error_;
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (startpos < 0 || startpos > endpos || endpos > text.size()) {
5562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (options_.log_errors())
5572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      LOG(ERROR) << "RE2: invalid startpos, endpos pair.";
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece subtext = text;
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  subtext.remove_prefix(startpos);
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  subtext.remove_suffix(text.size() - endpos);
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Use DFAs to find exact location of match, filter out non-matches.
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Don't ask for the location if we won't use it.
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // SearchDFA can do extra optimizations in that case.
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece match;
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece* matchp = &match;
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (nsubmatch == 0)
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    matchp = NULL;
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int ncap = 1 + NumberOfCapturingGroups();
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (ncap > nsubmatch)
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ncap = nsubmatch;
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If the regexp is anchored explicitly, must not be in middle of text.
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (prog_->anchor_start() && startpos != 0)
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If the regexp is anchored explicitly, update re_anchor
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // so that we can potentially fall into a faster case below.
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (prog_->anchor_start() && prog_->anchor_end())
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    re_anchor = ANCHOR_BOTH;
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    re_anchor = ANCHOR_START;
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check for the required prefix, if any.
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int prefixlen = 0;
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!prefix_.empty()) {
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (startpos != 0)
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    prefixlen = prefix_.size();
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (prefixlen > subtext.size())
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (prefix_foldcase_) {
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    subtext.remove_prefix(prefixlen);
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // If there is a required prefix, the anchor must be at least ANCHOR_START.
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (re_anchor != ANCHOR_BOTH)
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      re_anchor = ANCHOR_START;
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prog::Anchor anchor = Prog::kUnanchored;
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prog::MatchKind kind = Prog::kFirstMatch;
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (options_.longest_match())
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    kind = Prog::kLongestMatch;
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool skipped_test = false;
6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // SearchBitState allocates a bit vector of size prog_->size() * text.size().
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // It also allocates a stack of 3-word structures which could potentially
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // grow as large as prog_->size() * text.size() but in practice is much
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // smaller.
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Conditions for using SearchBitState:
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int MaxBitStateProg = 500;   // prog_->size() <= Max.
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int MaxBitStateVector = 256*1024;  // bit vector size <= Max (bits)
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool can_bit_state = prog_->size() <= MaxBitStateProg;
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int bit_state_text_max = MaxBitStateVector / prog_->size();
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool dfa_failed = false;
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (re_anchor) {
6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    default:
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case UNANCHORED: {
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!prog_->SearchDFA(subtext, text, anchor, kind,
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            matchp, &dfa_failed, NULL)) {
6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (dfa_failed) {
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Fall back to NFA below.
6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          skipped_test = true;
6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (FLAGS_trace_re2)
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LOG(INFO) << "Match " << trunc(pattern_)
6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      << " [" << CEscape(subtext) << "]"
6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      << " DFA failed.";
6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          break;
6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (FLAGS_trace_re2)
6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          LOG(INFO) << "Match " << trunc(pattern_)
6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << " [" << CEscape(subtext) << "]"
6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << " used DFA - no match.";
6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (FLAGS_trace_re2)
6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        LOG(INFO) << "Match " << trunc(pattern_)
6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << " [" << CEscape(subtext) << "]"
6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << " used DFA - match";
6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (matchp == NULL)  // Matched.  Don't care where
6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return true;
6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // SearchDFA set match[0].end() but didn't know where the
6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // match started.  Run the regexp backward from match[0].end()
6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // to find the longest possible match -- that's where it started.
6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Prog* prog = ReverseProg();
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prog == NULL)
6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!prog->SearchDFA(match, text, Prog::kAnchored,
6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (dfa_failed) {
6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Fall back to NFA below.
6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          skipped_test = true;
6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (FLAGS_trace_re2)
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LOG(INFO) << "Match " << trunc(pattern_)
6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      << " [" << CEscape(subtext) << "]"
6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      << " reverse DFA failed.";
6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          break;
6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (FLAGS_trace_re2)
6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          LOG(INFO) << "Match " << trunc(pattern_)
6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << " [" << CEscape(subtext) << "]"
6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << " DFA inconsistency.";
6762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        if (options_.log_errors())
6772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          LOG(ERROR) << "DFA inconsistency";
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (FLAGS_trace_re2)
6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        LOG(INFO) << "Match " << trunc(pattern_)
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << " [" << CEscape(subtext) << "]"
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << " used reverse DFA.";
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case ANCHOR_BOTH:
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case ANCHOR_START:
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (re_anchor == ANCHOR_BOTH)
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        kind = Prog::kFullMatch;
6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      anchor = Prog::kAnchored;
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // If only a small amount of text and need submatch
6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // information anyway and we're going to use OnePass or BitState
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // to get it, we might as well not even bother with the DFA:
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // OnePass or BitState will be fast enough.
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // On tiny texts, OnePass outruns even the DFA, and
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // it doesn't have the shared state and occasional mutex that
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // the DFA does.
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (can_one_pass && text.size() <= 4096 &&
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          (ncap > 1 || text.size() <= 8)) {
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (FLAGS_trace_re2)
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          LOG(INFO) << "Match " << trunc(pattern_)
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << " [" << CEscape(subtext) << "]"
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << " skipping DFA for OnePass.";
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        skipped_test = true;
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) {
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (FLAGS_trace_re2)
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          LOG(INFO) << "Match " << trunc(pattern_)
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << " [" << CEscape(subtext) << "]"
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << " skipping DFA for BitState.";
7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        skipped_test = true;
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!prog_->SearchDFA(subtext, text, anchor, kind,
7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            &match, &dfa_failed, NULL)) {
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (dfa_failed) {
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (FLAGS_trace_re2)
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LOG(INFO) << "Match " << trunc(pattern_)
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      << " [" << CEscape(subtext) << "]"
7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      << " DFA failed.";
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          skipped_test = true;
7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          break;
7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (FLAGS_trace_re2)
7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          LOG(INFO) << "Match " << trunc(pattern_)
7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << " [" << CEscape(subtext) << "]"
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << " used DFA - no match.";
7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!skipped_test && ncap <= 1) {
7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We know exactly where it matches.  That's enough.
7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ncap == 1)
7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      submatch[0] = match;
7405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
7415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    StringPiece subtext1;
7425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (skipped_test) {
7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // DFA ran out of memory or was skipped:
7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // need to search in entire original text.
7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      subtext1 = subtext;
7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // DFA found the exact match location:
7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // let NFA run an anchored, full match search
7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // to find submatch locations.
7505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      subtext1 = match;
7515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      anchor = Prog::kAnchored;
7525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      kind = Prog::kFullMatch;
7535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (can_one_pass && anchor != Prog::kUnanchored) {
7565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (FLAGS_trace_re2)
7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        LOG(INFO) << "Match " << trunc(pattern_)
7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << " [" << CEscape(subtext) << "]"
7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << " using OnePass.";
7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
7612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        if (!skipped_test && options_.log_errors())
7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          LOG(ERROR) << "SearchOnePass inconsistency";
7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (can_bit_state && subtext1.size() <= bit_state_text_max) {
7665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (FLAGS_trace_re2)
7675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        LOG(INFO) << "Match " << trunc(pattern_)
7685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << " [" << CEscape(subtext) << "]"
7695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << " using BitState.";
7705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!prog_->SearchBitState(subtext1, text, anchor,
7715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 kind, submatch, ncap)) {
7722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        if (!skipped_test && options_.log_errors())
7735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          LOG(ERROR) << "SearchBitState inconsistency";
7745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
7755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
7765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
7775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (FLAGS_trace_re2)
7785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        LOG(INFO) << "Match " << trunc(pattern_)
7795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << " [" << CEscape(subtext) << "]"
7805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << " using NFA.";
7815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
7822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        if (!skipped_test && options_.log_errors())
7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          LOG(ERROR) << "SearchNFA inconsistency";
7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Adjust overall match for required prefix that we stripped off.
7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (prefixlen > 0 && nsubmatch > 0)
7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    submatch[0] = StringPiece(submatch[0].begin() - prefixlen,
7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              submatch[0].size() + prefixlen);
7935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Zero submatches that don't exist in the regexp.
7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = ncap; i < nsubmatch; i++)
7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    submatch[i] = NULL;
7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Internal matcher - like Match() but takes Args not StringPieces.
8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::DoMatch(const StringPiece& text,
8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  Anchor anchor,
8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  int* consumed,
8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  const Arg* const* args,
8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  int n) const {
8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!ok()) {
8075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (options_.log_errors())
8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      LOG(ERROR) << "Invalid RE2: " << *error_;
8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Count number of capture groups needed.
8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nvec;
8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n == 0 && consumed == NULL)
8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    nvec = 0;
8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  else
8175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    nvec = n+1;
8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece* vec;
8205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece stkvec[kVecSize];
8215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece* heapvec = NULL;
8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (nvec <= arraysize(stkvec)) {
8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    vec = stkvec;
8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
8265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    vec = new StringPiece[nvec];
8275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    heapvec = vec;
8285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!Match(text, 0, text.size(), anchor, vec, nvec)) {
8315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete[] heapvec;
8325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
8335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if(consumed != NULL)
8365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *consumed = vec[0].end() - text.begin();
8375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n == 0 || args == NULL) {
8395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We are not interested in results
8405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete[] heapvec;
8415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
8425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int ncap = NumberOfCapturingGroups();
8455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (ncap < n) {
8465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // RE has fewer capturing groups than number of arg pointers passed in
8475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    VLOG(1) << "Asked for " << n << " but only have " << ncap;
8485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete[] heapvec;
8495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
8505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we got here, we must have matched the whole pattern.
8535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < n; i++) {
8545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const StringPiece& s = vec[i+1];
8555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!args[i]->Parse(s.data(), s.size())) {
8565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // TODO: Should we indicate what the error was?
8575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(1) << "Parse error on #" << i << " " << s << " "
8585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	      << (void*)s.data() << "/" << s.size();
8595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      delete[] heapvec;
8605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
8615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
8625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete[] heapvec;
8655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
8665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Append the "rewrite" string, with backslash subsitutions from "vec",
8695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// to string "out".
8705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Rewrite(string *out, const StringPiece &rewrite,
8715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 const StringPiece *vec, int veclen) const {
8725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (const char *s = rewrite.data(), *end = s + rewrite.size();
8735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       s < end; s++) {
8745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int c = *s;
8755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (c == '\\') {
8765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      s++;
8775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      c = (s < end) ? *s : -1;
8785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (isdigit(c)) {
8795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        int n = (c - '0');
8805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (n >= veclen) {
8812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          if (options_.log_errors()) {
8822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            LOG(ERROR) << "requested group " << n
8832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                       << " in regexp " << rewrite.data();
8842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          }
8855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          return false;
8865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
8875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        StringPiece snip = vec[n];
8885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (snip.size() > 0)
8895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          out->append(snip.data(), snip.size());
8905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else if (c == '\\') {
8915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        out->push_back('\\');
8925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
8932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        if (options_.log_errors())
8942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
8955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
8965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
8975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
8985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      out->push_back(c);
8995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
9025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Return the number of capturing subpatterns, or -1 if the
9055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regexp wasn't valid on construction.
9065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int RE2::NumberOfCapturingGroups() const {
9075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (suffix_regexp_ == NULL)
9085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return -1;
9095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ANNOTATE_BENIGN_RACE(&num_captures_, "benign race: in the worst case"
9105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    " multiple threads end up doing the same work in parallel.");
9115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (num_captures_ == -1)
9125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    num_captures_ = suffix_regexp_->NumCaptures();
9135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return num_captures_;
9145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Checks that the rewrite string is well-formed with respect to this
9175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regular expression.
9185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const {
9195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int max_token = -1;
9205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (const char *s = rewrite.data(), *end = s + rewrite.size();
9215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       s < end; s++) {
9225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int c = *s;
9235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (c != '\\') {
9245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
9255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (++s == end) {
9275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *error = "Rewrite schema error: '\\' not allowed at end.";
9285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
9295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    c = *s;
9315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (c == '\\') {
9325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
9335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!isdigit(c)) {
9355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *error = "Rewrite schema error: "
9365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               "'\\' must be followed by a digit or '\\'.";
9375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
9385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int n = (c - '0');
9405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (max_token < n) {
9415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      max_token = n;
9425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (max_token > NumberOfCapturingGroups()) {
9465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SStringPrintf(error, "Rewrite schema requests %d matches, "
9475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  "but the regexp only has %d parenthesized subexpressions.",
9485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  max_token, NumberOfCapturingGroups());
9495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
9505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
9525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/***** Parsers for various types *****/
9555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_null(const char* str, int n, void* dest) {
9575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We fail if somebody asked us to store into a non-NULL void* pointer
9585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (dest == NULL);
9595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_string(const char* str, int n, void* dest) {
9625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
9635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  reinterpret_cast<string*>(dest)->assign(str, n);
9645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
9655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_stringpiece(const char* str, int n, void* dest) {
9685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
9695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  reinterpret_cast<StringPiece*>(dest)->set(str, n);
9705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
9715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_char(const char* str, int n, void* dest) {
9745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n != 1) return false;
9755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
9765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(reinterpret_cast<char*>(dest)) = str[0];
9775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
9785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_uchar(const char* str, int n, void* dest) {
9815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n != 1) return false;
9825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
9835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(reinterpret_cast<unsigned char*>(dest)) = str[0];
9845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
9855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Largest number spec that we are willing to parse
9885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int kMaxNumberLength = 32;
9895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// REQUIRES "buf" must have length at least kMaxNumberLength+1
9915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copies "str" into "buf" and null-terminates.
9925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Overwrites *np with the new length.
9935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* TerminateNumber(char* buf, const char* str, int* np) {
9945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int n = *np;
9955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n <= 0) return "";
9965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n > 0 && isspace(*str)) {
9975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We are less forgiving than the strtoxxx() routines and do not
9985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // allow leading spaces.
9995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return "";
10005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
10015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Although buf has a fixed maximum size, we can still handle
10035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // arbitrarily large integers correctly by omitting leading zeros.
10045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // (Numbers that are still too long will be out of range.)
10055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Before deciding whether str is too long,
10065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // remove leading zeros with s/000+/00/.
10075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Leaving the leading two zeros in place means that
10085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // we don't change 0000x123 (invalid) into 0x123 (valid).
10095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Skip over leading - before replacing.
10105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool neg = false;
10115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n >= 1 && str[0] == '-') {
10125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    neg = true;
10135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    n--;
10145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    str++;
10155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
10165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n >= 3 && str[0] == '0' && str[1] == '0') {
10185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (n >= 3 && str[2] == '0') {
10195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      n--;
10205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      str++;
10215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
10225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
10235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (neg) {  // make room in buf for -
10255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    n++;
10265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    str--;
10275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
10285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n > kMaxNumberLength) return "";
10305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memmove(buf, str, n);
10325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (neg) {
10335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    buf[0] = '-';
10345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
10355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  buf[n] = '\0';
10365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *np = n;
10375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return buf;
10385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
10395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_long_radix(const char* str,
10415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               int n,
10425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               void* dest,
10435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               int radix) {
10445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n == 0) return false;
10455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char buf[kMaxNumberLength+1];
10465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  str = TerminateNumber(buf, str, &n);
10475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char* end;
10485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  errno = 0;
10495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  long r = strtol(str, &end, radix);
10505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (end != str + n) return false;   // Leftover junk
10515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (errno) return false;
10525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
10535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(reinterpret_cast<long*>(dest)) = r;
10545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
10555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
10565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_ulong_radix(const char* str,
10585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                int n,
10595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                void* dest,
10605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                int radix) {
10615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n == 0) return false;
10625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char buf[kMaxNumberLength+1];
10635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  str = TerminateNumber(buf, str, &n);
10645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (str[0] == '-') {
10655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   // strtoul() will silently accept negative numbers and parse
10665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   // them.  This module is more strict and treats them as errors.
10675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   return false;
10685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
10695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char* end;
10715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  errno = 0;
10725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unsigned long r = strtoul(str, &end, radix);
10735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (end != str + n) return false;   // Leftover junk
10745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (errno) return false;
10755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
10765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(reinterpret_cast<unsigned long*>(dest)) = r;
10775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
10785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
10795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_short_radix(const char* str,
10815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                int n,
10825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                void* dest,
10835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                int radix) {
10845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  long r;
10855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
10865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ((short)r != r) return false;       // Out of range
10875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
10885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(reinterpret_cast<short*>(dest)) = r;
10895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
10905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
10915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_ushort_radix(const char* str,
10935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 int n,
10945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 void* dest,
10955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 int radix) {
10965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unsigned long r;
10975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
10985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ((ushort)r != r) return false;                      // Out of range
10995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
11005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(reinterpret_cast<unsigned short*>(dest)) = r;
11015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
11025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
11035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_int_radix(const char* str,
11055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              int n,
11065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              void* dest,
11075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              int radix) {
11085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  long r;
11095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
11105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ((int)r != r) return false;         // Out of range
11115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
11125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(reinterpret_cast<int*>(dest)) = r;
11135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
11145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
11155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_uint_radix(const char* str,
11175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               int n,
11185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               void* dest,
11195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               int radix) {
11205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unsigned long r;
11215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
11225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ((uint)r != r) return false;                       // Out of range
11235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
11245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(reinterpret_cast<unsigned int*>(dest)) = r;
11255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
11265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
11275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_longlong_radix(const char* str,
11295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   int n,
11305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   void* dest,
11315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   int radix) {
11325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n == 0) return false;
11335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char buf[kMaxNumberLength+1];
11345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  str = TerminateNumber(buf, str, &n);
11355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char* end;
11365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  errno = 0;
11375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int64 r = strtoll(str, &end, radix);
11385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (end != str + n) return false;   // Leftover junk
11395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (errno) return false;
11405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
11415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(reinterpret_cast<int64*>(dest)) = r;
11425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
11435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
11445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_ulonglong_radix(const char* str,
11465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    int n,
11475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    void* dest,
11485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    int radix) {
11495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n == 0) return false;
11505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char buf[kMaxNumberLength+1];
11515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  str = TerminateNumber(buf, str, &n);
11525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (str[0] == '-') {
11535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // strtoull() will silently accept negative numbers and parse
11545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // them.  This module is more strict and treats them as errors.
11555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
11565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
11575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char* end;
11585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  errno = 0;
11595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint64 r = strtoull(str, &end, radix);
11605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (end != str + n) return false;   // Leftover junk
11615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (errno) return false;
11625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
11635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(reinterpret_cast<uint64*>(dest)) = r;
11645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
11655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
11665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) {
11685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n == 0) return false;
11695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const int kMaxLength = 200;
11705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char buf[kMaxLength];
11715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (n >= kMaxLength) return false;
11725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memcpy(buf, str, n);
11735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  buf[n] = '\0';
11745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  errno = 0;
11755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char* end;
11765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  double r;
11775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (isfloat) {
11785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    r = strtof(buf, &end);
11795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
11805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    r = strtod(buf, &end);
11815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
11825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (end != buf + n) return false;   // Leftover junk
11835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (errno) return false;
11845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (dest == NULL) return true;
11855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (isfloat) {
11865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *(reinterpret_cast<float*>(dest)) = r;
11875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
11885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *(reinterpret_cast<double*>(dest)) = r;
11895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
11905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
11915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
11925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_double(const char* str, int n, void* dest) {
11945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return parse_double_float(str, n, false, dest);
11955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
11965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_float(const char* str, int n, void* dest) {
11985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return parse_double_float(str, n, true, dest);
11995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
12005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DEFINE_INTEGER_PARSERS(name)                                        \
12035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool RE2::Arg::parse_##name(const char* str, int n, void* dest) {          \
12045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return parse_##name##_radix(str, n, dest, 10);                          \
12055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }                                                                         \
12065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool RE2::Arg::parse_##name##_hex(const char* str, int n, void* dest) {    \
12075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return parse_##name##_radix(str, n, dest, 16);                          \
12085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }                                                                         \
12095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool RE2::Arg::parse_##name##_octal(const char* str, int n, void* dest) {  \
12105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return parse_##name##_radix(str, n, dest, 8);                           \
12115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }                                                                         \
12125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool RE2::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
12135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return parse_##name##_radix(str, n, dest, 0);                           \
12145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
12155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(short);
12175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(ushort);
12185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(int);
12195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(uint);
12205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(long);
12215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(ulong);
12225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(longlong);
12235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(ulonglong);
12245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef DEFINE_INTEGER_PARSERS
12265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
1228