12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copyright 2003-2009 The RE2 Authors. All Rights Reserved. 22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Use of this source code is governed by a BSD-style 32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// license that can be found in the LICENSE file. 42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Regular expression interface RE2. 62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Originally the PCRE C++ wrapper, but adapted to use 82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// the new automata-based regular expression engines. 92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/re2.h" 112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <stdio.h> 132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <string> 142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <pthread.h> 152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <errno.h> 162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/util.h" 172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/flags.h" 182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/prog.h" 192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/regexp.h" 202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_bool(trace_re2, false, "trace RE2 execution"); 222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 { 242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Maximum number of args we can set 262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const int kMaxArgs = 16; 272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const int kVecSize = 1+kMaxArgs; 282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonconst VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::FullMatchN> RE2::FullMatch; 302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonconst VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::PartialMatchN> RE2::PartialMatch; 312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonconst VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume; 322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonconst VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndConsumeN> RE2::FindAndConsume; 332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// This will trigger LNK2005 error in MSVC. 350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#ifndef COMPILER_MSVC 362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonconst int RE2::Options::kDefaultMaxMem; // initialized in re2.h 370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#endif // COMPILER_MSVC 380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinRE2::Options::Options(RE2::CannedOptions opt) 400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8), 410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin posix_syntax_(opt == RE2::POSIX), 420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin longest_match_(opt == RE2::POSIX), 430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin log_errors_(opt != RE2::Quiet), 440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin max_mem_(kDefaultMaxMem), 450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin literal_(false), 460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin never_nl_(false), 470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin never_capture_(false), 480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case_sensitive_(true), 490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin perl_classes_(false), 500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin word_boundary_(false), 510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin one_line_(false) { 520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// static empty things for use as const references. 550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// To avoid global constructors, initialized on demand. 560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinGLOBAL_MUTEX(empty_mutex); 570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic const string *empty_string; 580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic const map<string, int> *empty_named_groups; 590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic const map<int, string> *empty_group_names; 600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic void InitEmpty() { 620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin GLOBAL_MUTEX_LOCK(empty_mutex); 630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (empty_string == NULL) { 640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin empty_string = new string; 650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin empty_named_groups = new map<string, int>; 660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin empty_group_names = new map<int, string>; 670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin GLOBAL_MUTEX_UNLOCK(empty_mutex); 690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Converts from Regexp error code to RE2 error code. 722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Maybe some day they will diverge. In any event, this 732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// hides the existence of Regexp from RE2 users. 742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) { 752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson switch (code) { 762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpSuccess: 772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::NoError; 782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpInternalError: 792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorInternal; 802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpBadEscape: 812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorBadEscape; 822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpBadCharClass: 832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorBadCharClass; 842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpBadCharRange: 852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorBadCharRange; 862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpMissingBracket: 872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorMissingBracket; 882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpMissingParen: 892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorMissingParen; 902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpTrailingBackslash: 912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorTrailingBackslash; 922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpRepeatArgument: 932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorRepeatArgument; 942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpRepeatSize: 952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorRepeatSize; 962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpRepeatOp: 972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorRepeatOp; 982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpBadPerlOp: 992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorBadPerlOp; 1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpBadUTF8: 1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorBadUTF8; 1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case re2::kRegexpBadNamedCapture: 1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorBadNamedCapture; 1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return RE2::ErrorInternal; 1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic string trunc(const StringPiece& pattern) { 1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (pattern.size() < 100) 1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return pattern.as_string(); 1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return pattern.substr(0, 100).as_string() + "..."; 1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonRE2::RE2(const char* pattern) { 1162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Init(pattern, DefaultOptions); 1172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonRE2::RE2(const string& pattern) { 1202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Init(pattern, DefaultOptions); 1212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonRE2::RE2(const StringPiece& pattern) { 1242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Init(pattern, DefaultOptions); 1252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonRE2::RE2(const StringPiece& pattern, const Options& options) { 1282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Init(pattern, options); 1292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint RE2::Options::ParseFlags() const { 1322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int flags = Regexp::ClassNL; 1332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson switch (encoding()) { 1342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson default: 1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (log_errors()) 1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(ERROR) << "Unknown encoding " << encoding(); 1372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case RE2::Options::EncodingUTF8: 1392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case RE2::Options::EncodingLatin1: 1412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson flags |= Regexp::Latin1; 1422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!posix_syntax()) 1462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson flags |= Regexp::LikePerl; 1472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (literal()) 1492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson flags |= Regexp::Literal; 1502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (never_nl()) 1522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson flags |= Regexp::NeverNL; 1532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (never_capture()) 1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin flags |= Regexp::NeverCapture; 1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!case_sensitive()) 1582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson flags |= Regexp::FoldCase; 1592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (perl_classes()) 1612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson flags |= Regexp::PerlClasses; 1622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (word_boundary()) 1642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson flags |= Regexp::PerlB; 1652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (one_line()) 1672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson flags |= Regexp::OneLine; 1682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return flags; 1702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonvoid RE2::Init(const StringPiece& pattern, const Options& options) { 1732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson mutex_ = new Mutex; 1742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pattern_ = pattern.as_string(); 1752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson options_.Copy(options); 1760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin InitEmpty(); 1770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin error_ = empty_string; 1782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson error_code_ = NoError; 1792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson suffix_regexp_ = NULL; 1802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson entire_regexp_ = NULL; 1812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson prog_ = NULL; 1822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson rprog_ = NULL; 1832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson named_groups_ = NULL; 1842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson group_names_ = NULL; 1852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson num_captures_ = -1; 1862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson RegexpStatus status; 1882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson entire_regexp_ = Regexp::Parse( 1892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pattern_, 1902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static_cast<Regexp::ParseFlags>(options_.ParseFlags()), 1912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson &status); 1922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (entire_regexp_ == NULL) { 1930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (error_ == empty_string) 1942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson error_ = new string(status.Text()); 1952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (options_.log_errors()) { 1962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': " 1972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << status.Text(); 1982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson error_arg_ = status.error_arg().as_string(); 2002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson error_code_ = RegexpErrorToRE2(status.code()); 2012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return; 2022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson prefix_.clear(); 2052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson prefix_foldcase_ = false; 2062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re2::Regexp* suffix; 2072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix)) 2082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson suffix_regexp_ = suffix; 2092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson else 2102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson suffix_regexp_ = entire_regexp_->Incref(); 2112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Two thirds of the memory goes to the forward Prog, 2132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // one third to the reverse prog, because the forward 2142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Prog has two DFAs but the reverse prog has one. 2152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3); 2162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prog_ == NULL) { 2172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (options_.log_errors()) 2182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'"; 2192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson error_ = new string("pattern too large - compile failed"); 2202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson error_code_ = RE2::ErrorPatternTooLarge; 2212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return; 2222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Could delay this until the first match call that 2252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // cares about submatch information, but the one-pass 2262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // machine's memory gets cut from the DFA memory budget, 2272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // and that is harder to do if the DFA has already 2282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // been built. 2292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson is_one_pass_ = prog_->IsOnePass(); 2302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Returns rprog_, computing it if needed. 2332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonre2::Prog* RE2::ReverseProg() const { 2342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson MutexLock l(mutex_); 2350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (rprog_ == NULL && error_ == empty_string) { 2362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson rprog_ = suffix_regexp_->CompileToReverseProg(options_.max_mem()/3); 2372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (rprog_ == NULL) { 2382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (options_.log_errors()) 2392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(ERROR) << "Error reverse compiling '" << trunc(pattern_) << "'"; 2402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson error_ = new string("pattern too large - reverse compile failed"); 2412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson error_code_ = RE2::ErrorPatternTooLarge; 2422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return NULL; 2432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return rprog_; 2462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonRE2::~RE2() { 2492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (suffix_regexp_) 2502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson suffix_regexp_->Decref(); 2512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (entire_regexp_) 2522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson entire_regexp_->Decref(); 2532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete mutex_; 2542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete prog_; 2552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete rprog_; 2560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (error_ != empty_string) 2572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete error_; 2580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (named_groups_ != NULL && named_groups_ != empty_named_groups) 2592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete named_groups_; 2600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (group_names_ != NULL && group_names_ != empty_group_names) 2612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete group_names_; 2622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint RE2::ProgramSize() const { 2652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prog_ == NULL) 2662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return -1; 2672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return prog_->size(); 2682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Returns named_groups_, computing it if needed. 2712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonconst map<string, int>& RE2::NamedCapturingGroups() const { 2722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson MutexLock l(mutex_); 2732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!ok()) 2740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return *empty_named_groups; 2752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (named_groups_ == NULL) { 2762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson named_groups_ = suffix_regexp_->NamedCaptures(); 2772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (named_groups_ == NULL) 2780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin named_groups_ = empty_named_groups; 2792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return *named_groups_; 2812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Returns group_names_, computing it if needed. 2842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonconst map<int, string>& RE2::CapturingGroupNames() const { 2852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson MutexLock l(mutex_); 2862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!ok()) 2870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return *empty_group_names; 2882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (group_names_ == NULL) { 2892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson group_names_ = suffix_regexp_->CaptureNames(); 2902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (group_names_ == NULL) 2910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin group_names_ = empty_group_names; 2922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return *group_names_; 2942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson/***** Convenience interfaces *****/ 2972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::FullMatchN(const StringPiece& text, const RE2& re, 2992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg* const args[], int n) { 3002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n); 3012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::PartialMatchN(const StringPiece& text, const RE2& re, 3042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg* const args[], int n) { 3052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return re.DoMatch(text, UNANCHORED, NULL, args, n); 3062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::ConsumeN(StringPiece* input, const RE2& re, 3092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg* const args[], int n) { 3102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int consumed; 3112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) { 3122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson input->remove_prefix(consumed); 3132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 3142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 3152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 3162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::FindAndConsumeN(StringPiece* input, const RE2& re, 3202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg* const args[], int n) { 3212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int consumed; 3222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) { 3232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson input->remove_prefix(consumed); 3242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 3252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 3262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 3272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Returns the maximum submatch needed for the rewrite to be done by Replace(). 3312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// E.g. if rewrite == "foo \\2,\\1", returns 2. 3320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinint RE2::MaxSubmatch(const StringPiece& rewrite) { 3332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int max = 0; 3342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (const char *s = rewrite.data(), *end = s + rewrite.size(); 3352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s < end; s++) { 3362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (*s == '\\') { 3372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s++; 3382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int c = (s < end) ? *s : -1; 3392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (isdigit(c)) { 3402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = (c - '0'); 3412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n > max) 3422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson max = n; 3432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return max; 3472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Replace(string *str, 3502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const RE2& re, 3512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const StringPiece& rewrite) { 3522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece vec[kVecSize]; 3532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int nvec = 1 + MaxSubmatch(rewrite); 3542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (nvec > arraysize(vec)) 3552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 3562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec)) 3572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 3582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string s; 3602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!re.Rewrite(&s, rewrite, vec, nvec)) 3612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 3622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson assert(vec[0].begin() >= str->data()); 3642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson assert(vec[0].end() <= str->data()+str->size()); 3652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str->replace(vec[0].data() - str->data(), vec[0].size(), s); 3662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 3672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint RE2::GlobalReplace(string *str, 3702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const RE2& re, 3712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const StringPiece& rewrite) { 3722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece vec[kVecSize]; 3732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int nvec = 1 + MaxSubmatch(rewrite); 3742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (nvec > arraysize(vec)) 3752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 3762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const char* p = str->data(); 3782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const char* ep = p + str->size(); 3792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const char* lastend = NULL; 3802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string out; 3812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int count = 0; 3822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson while (p <= ep) { 3832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!re.Match(*str, p - str->data(), str->size(), UNANCHORED, vec, nvec)) 3842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 3852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (p < vec[0].begin()) 3862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out.append(p, vec[0].begin() - p); 3872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (vec[0].begin() == lastend && vec[0].size() == 0) { 3882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Disallow empty match at end of last match: skip ahead. 3892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (p < ep) 3902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out.append(p, 1); 3912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson p++; 3922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson continue; 3932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re.Rewrite(&out, rewrite, vec, nvec); 3952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson p = vec[0].end(); 3962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson lastend = p; 3972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson count++; 3982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (count == 0) 4012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 4022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (p < ep) 4042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out.append(p, ep - p); 4052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson swap(out, *str); 4062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return count; 4072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Extract(const StringPiece &text, 4102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const RE2& re, 4112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const StringPiece &rewrite, 4122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string *out) { 4132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece vec[kVecSize]; 4142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int nvec = 1 + MaxSubmatch(rewrite); 4152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (nvec > arraysize(vec)) 4162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 4172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec)) 4192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 4202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out->clear(); 4222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return re.Rewrite(out, rewrite, vec, nvec); 4232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstring RE2::QuoteMeta(const StringPiece& unquoted) { 4262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string result; 4272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson result.reserve(unquoted.size() << 1); 4282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Escape any ascii character not in [A-Za-z_0-9]. 4302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // 4312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Note that it's legal to escape a character even if it has no 4322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // special meaning in a regular expression -- so this function does 4332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // that. (This also makes it identical to the perl function of the 4342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // same name except for the null-character special case; 4352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // see `perldoc -f quotemeta`.) 4362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int ii = 0; ii < unquoted.length(); ++ii) { 4372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Note that using 'isalnum' here raises the benchmark time from 4382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // 32ns to 58ns: 4392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && 4402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && 4412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson (unquoted[ii] < '0' || unquoted[ii] > '9') && 4422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson unquoted[ii] != '_' && 4432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If this is the part of a UTF8 or Latin1 character, we need 4442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // to copy this byte without escaping. Experimentally this is 4452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // what works correctly with the regexp library. 4462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson !(unquoted[ii] & 128)) { 4472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (unquoted[ii] == '\0') { // Special handling for null chars. 4482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Note that this special handling is not strictly required for RE2, 4492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // but this quoting is required for other regexp libraries such as 4502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // PCRE. 4512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Can't use "\\0" since the next character might be a digit. 4522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson result += "\\x00"; 4532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson continue; 4542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson result += '\\'; 4562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson result += unquoted[ii]; 4582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return result; 4612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const { 4642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prog_ == NULL) 4652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 4662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = prefix_.size(); 4682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n > maxlen) 4692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson n = maxlen; 4702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Determine initial min max from prefix_ literal. 4722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string pmin, pmax; 4732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pmin = prefix_.substr(0, n); 4742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pmax = prefix_.substr(0, n); 4752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prefix_foldcase_) { 4762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // prefix is ASCII lowercase; change pmin to uppercase. 4772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < n; i++) { 4782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ('a' <= pmin[i] && pmin[i] <= 'z') 4792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pmin[i] += 'A' - 'a'; 4802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Add to prefix min max using PossibleMatchRange on regexp. 4842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string dmin, dmax; 4852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson maxlen -= n; 4862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) { 4872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pmin += dmin; 4882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pmax += dmax; 4892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else if (pmax.size() > 0) { 4902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // prog_->PossibleMatchRange has failed us, 4912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // but we still have useful information from prefix_. 4922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Round up pmax to allow any possible suffix. 4932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pmax = PrefixSuccessor(pmax); 4942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 4952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Nothing useful. 4962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *min = ""; 4972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *max = ""; 4982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 4992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *min = pmin; 5022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *max = pmax; 5032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 5042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 5052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Avoid possible locale nonsense in standard strcasecmp. 5072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The string a is known to be all lowercase. 5082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic int ascii_strcasecmp(const char* a, const char* b, int len) { 5092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const char *ae = a + len; 5102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (; a < ae; a++, b++) { 5122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson uint8 x = *a; 5132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson uint8 y = *b; 5142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ('A' <= y && y <= 'Z') 5152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson y += 'a' - 'A'; 5162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (x != y) 5172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return x - y; 5182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 5202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 5212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson/***** Actual matching and rewriting code *****/ 5242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Match(const StringPiece& text, 5262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int startpos, 5272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int endpos, 5282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Anchor re_anchor, 5292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece* submatch, 5302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int nsubmatch) const { 5312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!ok() || suffix_regexp_ == NULL) { 5322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (options_.log_errors()) 5332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(ERROR) << "Invalid RE2: " << *error_; 5342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 5352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (startpos < 0 || startpos > endpos || endpos > text.size()) { 5380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (options_.log_errors()) 5390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(ERROR) << "RE2: invalid startpos, endpos pair."; 5402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 5412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 5432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece subtext = text; 5442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson subtext.remove_prefix(startpos); 5452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson subtext.remove_suffix(text.size() - endpos); 5462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Use DFAs to find exact location of match, filter out non-matches. 5482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Don't ask for the location if we won't use it. 5502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // SearchDFA can do extra optimizations in that case. 5512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece match; 5522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece* matchp = &match; 5532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (nsubmatch == 0) 5542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson matchp = NULL; 5552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int ncap = 1 + NumberOfCapturingGroups(); 5572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (ncap > nsubmatch) 5582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ncap = nsubmatch; 5592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If the regexp is anchored explicitly, must not be in middle of text. 5612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prog_->anchor_start() && startpos != 0) 5622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 5632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If the regexp is anchored explicitly, update re_anchor 5652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // so that we can potentially fall into a faster case below. 5662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prog_->anchor_start() && prog_->anchor_end()) 5672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_anchor = ANCHOR_BOTH; 5682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH) 5692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_anchor = ANCHOR_START; 5702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Check for the required prefix, if any. 5722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int prefixlen = 0; 5732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!prefix_.empty()) { 5742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (startpos != 0) 5752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 5762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson prefixlen = prefix_.size(); 5772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prefixlen > subtext.size()) 5782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 5792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prefix_foldcase_) { 5802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0) 5812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 5822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 5832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0) 5842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 5852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson subtext.remove_prefix(prefixlen); 5872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If there is a required prefix, the anchor must be at least ANCHOR_START. 5882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re_anchor != ANCHOR_BOTH) 5892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_anchor = ANCHOR_START; 5902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prog::Anchor anchor = Prog::kUnanchored; 5932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prog::MatchKind kind = Prog::kFirstMatch; 5942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (options_.longest_match()) 5952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson kind = Prog::kLongestMatch; 5962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool skipped_test = false; 5972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture); 5992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // SearchBitState allocates a bit vector of size prog_->size() * text.size(). 6012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // It also allocates a stack of 3-word structures which could potentially 6022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // grow as large as prog_->size() * text.size() but in practice is much 6032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // smaller. 6042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Conditions for using SearchBitState: 6052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const int MaxBitStateProg = 500; // prog_->size() <= Max. 6062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const int MaxBitStateVector = 256*1024; // bit vector size <= Max (bits) 6072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool can_bit_state = prog_->size() <= MaxBitStateProg; 6082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int bit_state_text_max = MaxBitStateVector / prog_->size(); 6092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool dfa_failed = false; 6112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson switch (re_anchor) { 6122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson default: 6132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case UNANCHORED: { 6142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!prog_->SearchDFA(subtext, text, anchor, kind, 6152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson matchp, &dfa_failed, NULL)) { 6162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dfa_failed) { 6172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Fall back to NFA below. 6182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson skipped_test = true; 6192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 6202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 6212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 6222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " DFA failed."; 6232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 6242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 6262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 6272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 6282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " used DFA - no match."; 6292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 6302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 6322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 6332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 6342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " used DFA - match"; 6352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (matchp == NULL) // Matched. Don't care where 6362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 6372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // SearchDFA set match[0].end() but didn't know where the 6382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // match started. Run the regexp backward from match[0].end() 6392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // to find the longest possible match -- that's where it started. 6402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prog* prog = ReverseProg(); 6412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prog == NULL) 6422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 6432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!prog->SearchDFA(match, text, Prog::kAnchored, 6442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prog::kLongestMatch, &match, &dfa_failed, NULL)) { 6452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dfa_failed) { 6462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Fall back to NFA below. 6472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson skipped_test = true; 6482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 6492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 6502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 6512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " reverse DFA failed."; 6522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 6532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 6552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 6562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 6572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " DFA inconsistency."; 6580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (options_.log_errors()) 6590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(ERROR) << "DFA inconsistency"; 6602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 6612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 6632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 6642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 6652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " used reverse DFA."; 6662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 6672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case ANCHOR_BOTH: 6702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case ANCHOR_START: 6712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re_anchor == ANCHOR_BOTH) 6722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson kind = Prog::kFullMatch; 6732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson anchor = Prog::kAnchored; 6742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If only a small amount of text and need submatch 6762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // information anyway and we're going to use OnePass or BitState 6772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // to get it, we might as well not even bother with the DFA: 6782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // OnePass or BitState will be fast enough. 6792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // On tiny texts, OnePass outruns even the DFA, and 6802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // it doesn't have the shared state and occasional mutex that 6812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // the DFA does. 6822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (can_one_pass && text.size() <= 4096 && 6832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson (ncap > 1 || text.size() <= 8)) { 6842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 6852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 6862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 6872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " skipping DFA for OnePass."; 6882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson skipped_test = true; 6892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 6902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) { 6922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 6932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 6942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 6952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " skipping DFA for BitState."; 6962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson skipped_test = true; 6972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 6982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!prog_->SearchDFA(subtext, text, anchor, kind, 7002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson &match, &dfa_failed, NULL)) { 7012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dfa_failed) { 7022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 7032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 7042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 7052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " DFA failed."; 7062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson skipped_test = true; 7072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 7082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 7102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 7112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 7122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " used DFA - no match."; 7132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 7142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 7162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!skipped_test && ncap <= 1) { 7192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // We know exactly where it matches. That's enough. 7202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (ncap == 1) 7212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson submatch[0] = match; 7222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 7232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece subtext1; 7242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (skipped_test) { 7252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // DFA ran out of memory or was skipped: 7262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // need to search in entire original text. 7272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson subtext1 = subtext; 7282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 7292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // DFA found the exact match location: 7302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // let NFA run an anchored, full match search 7312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // to find submatch locations. 7322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson subtext1 = match; 7332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson anchor = Prog::kAnchored; 7342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson kind = Prog::kFullMatch; 7352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (can_one_pass && anchor != Prog::kUnanchored) { 7382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 7392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 7402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 7412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " using OnePass."; 7422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) { 7430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (!skipped_test && options_.log_errors()) 7442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(ERROR) << "SearchOnePass inconsistency"; 7452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 7462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else if (can_bit_state && subtext1.size() <= bit_state_text_max) { 7482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 7492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 7502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 7512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " using BitState."; 7522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!prog_->SearchBitState(subtext1, text, anchor, 7532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson kind, submatch, ncap)) { 7540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (!skipped_test && options_.log_errors()) 7552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(ERROR) << "SearchBitState inconsistency"; 7562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 7572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 7592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (FLAGS_trace_re2) 7602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "Match " << trunc(pattern_) 7612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " [" << CEscape(subtext) << "]" 7622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " using NFA."; 7632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) { 7640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (!skipped_test && options_.log_errors()) 7652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(ERROR) << "SearchNFA inconsistency"; 7662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 7672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Adjust overall match for required prefix that we stripped off. 7722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prefixlen > 0 && nsubmatch > 0) 7732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson submatch[0] = StringPiece(submatch[0].begin() - prefixlen, 7742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson submatch[0].size() + prefixlen); 7752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Zero submatches that don't exist in the regexp. 7772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = ncap; i < nsubmatch; i++) 7782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson submatch[i] = NULL; 7792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 7802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 7812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Internal matcher - like Match() but takes Args not StringPieces. 7832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::DoMatch(const StringPiece& text, 7842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Anchor anchor, 7852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int* consumed, 7862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg* const* args, 7872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n) const { 7882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!ok()) { 7892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (options_.log_errors()) 7902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(ERROR) << "Invalid RE2: " << *error_; 7912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 7922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Count number of capture groups needed. 7952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int nvec; 7962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0 && consumed == NULL) 7972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson nvec = 0; 7982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson else 7992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson nvec = n+1; 8002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece* vec; 8022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece stkvec[kVecSize]; 8032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece* heapvec = NULL; 8042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (nvec <= arraysize(stkvec)) { 8062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson vec = stkvec; 8072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 8082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson vec = new StringPiece[nvec]; 8092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson heapvec = vec; 8102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 8112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!Match(text, 0, text.size(), anchor, vec, nvec)) { 8132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete[] heapvec; 8142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 8152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 8162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(consumed != NULL) 8182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *consumed = vec[0].end() - text.begin(); 8192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0 || args == NULL) { 8212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // We are not interested in results 8222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete[] heapvec; 8232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 8242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 8252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int ncap = NumberOfCapturingGroups(); 8272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (ncap < n) { 8282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // RE has fewer capturing groups than number of arg pointers passed in 8292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson VLOG(1) << "Asked for " << n << " but only have " << ncap; 8302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete[] heapvec; 8312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 8322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 8332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If we got here, we must have matched the whole pattern. 8352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < n; i++) { 8362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const StringPiece& s = vec[i+1]; 8372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!args[i]->Parse(s.data(), s.size())) { 8382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // TODO: Should we indicate what the error was? 8392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson VLOG(1) << "Parse error on #" << i << " " << s << " " 8402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << (void*)s.data() << "/" << s.size(); 8412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete[] heapvec; 8422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 8432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 8442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 8452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete[] heapvec; 8472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 8482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 8492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Append the "rewrite" string, with backslash subsitutions from "vec", 8512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// to string "out". 8522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Rewrite(string *out, const StringPiece &rewrite, 8532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const StringPiece *vec, int veclen) const { 8542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (const char *s = rewrite.data(), *end = s + rewrite.size(); 8552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s < end; s++) { 8562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int c = *s; 8572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c == '\\') { 8582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s++; 8592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c = (s < end) ? *s : -1; 8602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (isdigit(c)) { 8612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = (c - '0'); 8622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n >= veclen) { 8630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (options_.log_errors()) { 8640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(ERROR) << "requested group " << n 8650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin << " in regexp " << rewrite.data(); 8660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 8672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 8682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 8692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringPiece snip = vec[n]; 8702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (snip.size() > 0) 8712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out->append(snip.data(), snip.size()); 8722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else if (c == '\\') { 8732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out->push_back('\\'); 8742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 8750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (options_.log_errors()) 8760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data(); 8772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 8782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 8792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 8802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out->push_back(c); 8812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 8822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 8832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 8842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 8852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Return the number of capturing subpatterns, or -1 if the 8872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// regexp wasn't valid on construction. 8882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint RE2::NumberOfCapturingGroups() const { 8892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (suffix_regexp_ == NULL) 8902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return -1; 8912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ANNOTATE_BENIGN_RACE(&num_captures_, "benign race: in the worst case" 8922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson " multiple threads end up doing the same work in parallel."); 8932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (num_captures_ == -1) 8942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson num_captures_ = suffix_regexp_->NumCaptures(); 8952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return num_captures_; 8962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 8972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Checks that the rewrite string is well-formed with respect to this 8992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// regular expression. 9002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const { 9012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int max_token = -1; 9022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (const char *s = rewrite.data(), *end = s + rewrite.size(); 9032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s < end; s++) { 9042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int c = *s; 9052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c != '\\') { 9062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson continue; 9072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (++s == end) { 9092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *error = "Rewrite schema error: '\\' not allowed at end."; 9102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 9112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c = *s; 9132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c == '\\') { 9142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson continue; 9152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!isdigit(c)) { 9172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *error = "Rewrite schema error: " 9182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson "'\\' must be followed by a digit or '\\'."; 9192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 9202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = (c - '0'); 9222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (max_token < n) { 9232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson max_token = n; 9242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (max_token > NumberOfCapturingGroups()) { 9282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson SStringPrintf(error, "Rewrite schema requests %d matches, " 9292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson "but the regexp only has %d parenthesized subexpressions.", 9302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson max_token, NumberOfCapturingGroups()); 9312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 9322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 9342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 9352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson/***** Parsers for various types *****/ 9372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_null(const char* str, int n, void* dest) { 9392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // We fail if somebody asked us to store into a non-NULL void* pointer 9402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return (dest == NULL); 9412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 9422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_string(const char* str, int n, void* dest) { 9442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 9452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson reinterpret_cast<string*>(dest)->assign(str, n); 9462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 9472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 9482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_stringpiece(const char* str, int n, void* dest) { 9502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 9512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson reinterpret_cast<StringPiece*>(dest)->set(str, n); 9522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 9532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 9542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_char(const char* str, int n, void* dest) { 9562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n != 1) return false; 9572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 9582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<char*>(dest)) = str[0]; 9592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 9602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 9612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_uchar(const char* str, int n, void* dest) { 9632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n != 1) return false; 9642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 9652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<unsigned char*>(dest)) = str[0]; 9662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 9672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 9682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Largest number spec that we are willing to parse 9702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const int kMaxNumberLength = 32; 9712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// REQUIRES "buf" must have length at least kMaxNumberLength+1 9732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copies "str" into "buf" and null-terminates. 9742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Overwrites *np with the new length. 9752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const char* TerminateNumber(char* buf, const char* str, int* np) { 9762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = *np; 9772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n <= 0) return ""; 9782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n > 0 && isspace(*str)) { 9792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // We are less forgiving than the strtoxxx() routines and do not 9802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // allow leading spaces. 9812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return ""; 9822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Although buf has a fixed maximum size, we can still handle 9852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // arbitrarily large integers correctly by omitting leading zeros. 9862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // (Numbers that are still too long will be out of range.) 9872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Before deciding whether str is too long, 9882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // remove leading zeros with s/000+/00/. 9892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Leaving the leading two zeros in place means that 9902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // we don't change 0000x123 (invalid) into 0x123 (valid). 9912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Skip over leading - before replacing. 9922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool neg = false; 9932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n >= 1 && str[0] == '-') { 9942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson neg = true; 9952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson n--; 9962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str++; 9972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n >= 3 && str[0] == '0' && str[1] == '0') { 10002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson while (n >= 3 && str[2] == '0') { 10012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson n--; 10022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str++; 10032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 10042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 10052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 10062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (neg) { // make room in buf for - 10072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson n++; 10082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str--; 10092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 10102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 10112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n > kMaxNumberLength) return ""; 10122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 10132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson memmove(buf, str, n); 10142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (neg) { 10152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson buf[0] = '-'; 10162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 10172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson buf[n] = '\0'; 10182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *np = n; 10192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return buf; 10202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 10212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 10222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_long_radix(const char* str, 10232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 10242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 10252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 10262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0) return false; 10272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char buf[kMaxNumberLength+1]; 10282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str = TerminateNumber(buf, str, &n); 10292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char* end; 10302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson errno = 0; 10312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson long r = strtol(str, &end, radix); 10322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (end != str + n) return false; // Leftover junk 10332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (errno) return false; 10342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 10352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<long*>(dest)) = r; 10362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 10372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 10382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 10392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_ulong_radix(const char* str, 10402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 10412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 10422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 10432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0) return false; 10442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char buf[kMaxNumberLength+1]; 10452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str = TerminateNumber(buf, str, &n); 10462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (str[0] == '-') { 10472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // strtoul() will silently accept negative numbers and parse 10482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // them. This module is more strict and treats them as errors. 10492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 10502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 10512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 10522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char* end; 10532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson errno = 0; 10542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson unsigned long r = strtoul(str, &end, radix); 10552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (end != str + n) return false; // Leftover junk 10562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (errno) return false; 10572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 10582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<unsigned long*>(dest)) = r; 10592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 10602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 10612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 10622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_short_radix(const char* str, 10632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 10642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 10652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 10662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson long r; 10672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse 10682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((short)r != r) return false; // Out of range 10692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 10702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<short*>(dest)) = r; 10712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 10722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 10732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 10742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_ushort_radix(const char* str, 10752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 10762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 10772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 10782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson unsigned long r; 10792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse 10802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((ushort)r != r) return false; // Out of range 10812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 10822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<unsigned short*>(dest)) = r; 10832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 10842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 10852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 10862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_int_radix(const char* str, 10872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 10882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 10892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 10902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson long r; 10912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse 10922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((int)r != r) return false; // Out of range 10932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 10942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<int*>(dest)) = r; 10952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 10962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 10972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 10982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_uint_radix(const char* str, 10992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 11002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 11012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 11022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson unsigned long r; 11032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse 11042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((uint)r != r) return false; // Out of range 11052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 11062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<unsigned int*>(dest)) = r; 11072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 11082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 11092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 11102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_longlong_radix(const char* str, 11112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 11122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 11132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 11142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0) return false; 11152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char buf[kMaxNumberLength+1]; 11162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str = TerminateNumber(buf, str, &n); 11172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char* end; 11182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson errno = 0; 11192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int64 r = strtoll(str, &end, radix); 11202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (end != str + n) return false; // Leftover junk 11212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (errno) return false; 11222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 11232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<int64*>(dest)) = r; 11242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 11252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 11262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 11272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_ulonglong_radix(const char* str, 11282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 11292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 11302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 11312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0) return false; 11322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char buf[kMaxNumberLength+1]; 11332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str = TerminateNumber(buf, str, &n); 11342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (str[0] == '-') { 11352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // strtoull() will silently accept negative numbers and parse 11362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // them. This module is more strict and treats them as errors. 11372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 11382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 11392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char* end; 11402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson errno = 0; 11412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson uint64 r = strtoull(str, &end, radix); 11422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (end != str + n) return false; // Leftover junk 11432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (errno) return false; 11442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 11452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<uint64*>(dest)) = r; 11462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 11472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 11482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 11492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic bool parse_double_float(const char* str, int n, bool isfloat, void *dest) { 11502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0) return false; 11512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static const int kMaxLength = 200; 11522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char buf[kMaxLength]; 11532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n >= kMaxLength) return false; 11542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson memcpy(buf, str, n); 11552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson buf[n] = '\0'; 11562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson errno = 0; 11572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char* end; 11582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson double r; 11592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (isfloat) { 11602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson r = strtof(buf, &end); 11612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 11622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson r = strtod(buf, &end); 11632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 11642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (end != buf + n) return false; // Leftover junk 11652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (errno) return false; 11662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 11672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (isfloat) { 11682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<float*>(dest)) = r; 11692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 11702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<double*>(dest)) = r; 11712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 11722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 11732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 11742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 11752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_double(const char* str, int n, void* dest) { 11762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return parse_double_float(str, n, false, dest); 11772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 11782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 11792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Arg::parse_float(const char* str, int n, void* dest) { 11802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return parse_double_float(str, n, true, dest); 11812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 11822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 11832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 11842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#define DEFINE_INTEGER_PARSERS(name) \ 11852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool RE2::Arg::parse_##name(const char* str, int n, void* dest) { \ 11862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return parse_##name##_radix(str, n, dest, 10); \ 11872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } \ 11882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool RE2::Arg::parse_##name##_hex(const char* str, int n, void* dest) { \ 11892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return parse_##name##_radix(str, n, dest, 16); \ 11902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } \ 11912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool RE2::Arg::parse_##name##_octal(const char* str, int n, void* dest) { \ 11922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return parse_##name##_radix(str, n, dest, 8); \ 11932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } \ 11942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool RE2::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \ 11952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return parse_##name##_radix(str, n, dest, 0); \ 11962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 11972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 11982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(short); 11992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(ushort); 12002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(int); 12012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(uint); 12022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(long); 12032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(ulong); 12042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(longlong); 12052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(ulonglong); 12062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 12072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#undef DEFINE_INTEGER_PARSERS 12082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 12092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} // namespace re2 1210