15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2003-2009 The RE2 Authors. All Rights Reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Regular expression interface RE2. 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Originally the PCRE C++ wrapper, but adapted to use 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the new automata-based regular expression engines. 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/re2.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdio.h> 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef WIN32 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define strtoll _strtoi64 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define strtoull _strtoui64 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define strtof strtod 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#else 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <pthread.h> 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <errno.h> 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h" 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/flags.h" 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prog.h" 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/regexp.h" 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_bool(trace_re2, false, "trace RE2 execution"); 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 { 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Maximum number of args we can set 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int kMaxArgs = 16; 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int kVecSize = 1+kMaxArgs; 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::FullMatchN> RE2::FullMatch = {}; 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::PartialMatchN> RE2::PartialMatch = {}; 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume = {}; 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndConsumeN> RE2::FindAndConsume = {}; 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define kDefaultMaxMem (8<<20) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::Options::Options() 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : encoding_(EncodingUTF8), 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) posix_syntax_(false), 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) longest_match_(false), 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) log_errors_(true), 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) max_mem_(kDefaultMaxMem), 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) literal_(false), 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) never_nl_(false), 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) never_capture_(false), 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case_sensitive_(true), 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) perl_classes_(false), 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) word_boundary_(false), 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) one_line_(false) { 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::Options::Options(RE2::CannedOptions opt) 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8), 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) posix_syntax_(opt == RE2::POSIX_SYNTAX), 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) longest_match_(opt == RE2::POSIX_SYNTAX), 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) log_errors_(opt != RE2::Quiet), 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) max_mem_(kDefaultMaxMem), 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) literal_(false), 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) never_nl_(false), 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) never_capture_(false), 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case_sensitive_(true), 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) perl_classes_(false), 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) word_boundary_(false), 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) one_line_(false) { 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static empty things for use as const references. 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To avoid global constructors, initialized on demand. 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)GLOBAL_MUTEX(empty_mutex); 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const string *empty_string; 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const map<string, int> *empty_named_groups; 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const map<int, string> *empty_group_names; 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void InitEmpty() { 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GLOBAL_MUTEX_LOCK(empty_mutex); 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (empty_string == NULL) { 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) empty_string = new string; 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) empty_named_groups = new map<string, int>; 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) empty_group_names = new map<int, string>; 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GLOBAL_MUTEX_UNLOCK(empty_mutex); 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Converts from Regexp error code to RE2 error code. 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Maybe some day they will diverge. In any event, this 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// hides the existence of Regexp from RE2 users. 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) { 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (code) { 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpSuccess: 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::NoError; 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpInternalError: 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorInternal; 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpBadEscape: 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorBadEscape; 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpBadCharClass: 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorBadCharClass; 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpBadCharRange: 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorBadCharRange; 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpMissingBracket: 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorMissingBracket; 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpMissingParen: 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorMissingParen; 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpTrailingBackslash: 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorTrailingBackslash; 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpRepeatArgument: 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorRepeatArgument; 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpRepeatSize: 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorRepeatSize; 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpRepeatOp: 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorRepeatOp; 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpBadPerlOp: 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorBadPerlOp; 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpBadUTF8: 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorBadUTF8; 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case re2::kRegexpBadNamedCapture: 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorBadNamedCapture; 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::ErrorInternal; 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static string trunc(const StringPiece& pattern) { 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (pattern.size() < 100) 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return pattern.as_string(); 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return pattern.substr(0, 100).as_string() + "..."; 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::RE2(const char* pattern) { 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Init(pattern, DefaultOptions); 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::RE2(const string& pattern) { 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Init(pattern, DefaultOptions); 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::RE2(const StringPiece& pattern) { 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Init(pattern, DefaultOptions); 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::RE2(const StringPiece& pattern, const Options& options) { 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Init(pattern, options); 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int RE2::Options::ParseFlags() const { 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int flags = Regexp::ClassNL; 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (encoding()) { 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (log_errors()) 1542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LOG(ERROR) << "Unknown encoding " << encoding(); 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case RE2::Options::EncodingUTF8: 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case RE2::Options::EncodingLatin1: 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flags |= Regexp::Latin1; 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!posix_syntax()) 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flags |= Regexp::LikePerl; 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (literal()) 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flags |= Regexp::Literal; 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (never_nl()) 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flags |= Regexp::NeverNL; 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (never_capture()) 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flags |= Regexp::NeverCapture; 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!case_sensitive()) 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flags |= Regexp::FoldCase; 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (perl_classes()) 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flags |= Regexp::PerlClasses; 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (word_boundary()) 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flags |= Regexp::PerlB; 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (one_line()) 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flags |= Regexp::OneLine; 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return flags; 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void RE2::Init(const StringPiece& pattern, const Options& options) { 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mutex_ = new Mutex; 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pattern_ = pattern.as_string(); 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) options_.Copy(options); 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) InitEmpty(); 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) error_ = empty_string; 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) error_code_ = NoError; 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) suffix_regexp_ = NULL; 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) entire_regexp_ = NULL; 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prog_ = NULL; 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rprog_ = NULL; 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) named_groups_ = NULL; 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) group_names_ = NULL; 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) num_captures_ = -1; 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RegexpStatus status; 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) entire_regexp_ = Regexp::Parse( 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pattern_, 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static_cast<Regexp::ParseFlags>(options_.ParseFlags()), 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &status); 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (entire_regexp_ == NULL) { 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (error_ == empty_string) 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) error_ = new string(status.Text()); 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options_.log_errors()) { 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': " 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << status.Text(); 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) error_arg_ = status.error_arg().as_string(); 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) error_code_ = RegexpErrorToRE2(status.code()); 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefix_.clear(); 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefix_foldcase_ = false; 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) re2::Regexp* suffix; 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix)) 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) suffix_regexp_ = suffix; 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) suffix_regexp_ = entire_regexp_->Incref(); 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Two thirds of the memory goes to the forward Prog, 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // one third to the reverse prog, because the forward 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Prog has two DFAs but the reverse prog has one. 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3); 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_ == NULL) { 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options_.log_errors()) 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'"; 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) error_ = new string("pattern too large - compile failed"); 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) error_code_ = RE2::ErrorPatternTooLarge; 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Could delay this until the first match call that 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // cares about submatch information, but the one-pass 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // machine's memory gets cut from the DFA memory budget, 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // and that is harder to do if the DFA has already 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // been built. 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) is_one_pass_ = prog_->IsOnePass(); 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns rprog_, computing it if needed. 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)re2::Prog* RE2::ReverseProg() const { 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MutexLock l(mutex_); 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (rprog_ == NULL && error_ == empty_string) { 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rprog_ = suffix_regexp_->CompileToReverseProg(options_.max_mem()/3); 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (rprog_ == NULL) { 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options_.log_errors()) 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(ERROR) << "Error reverse compiling '" << trunc(pattern_) << "'"; 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) error_ = new string("pattern too large - reverse compile failed"); 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) error_code_ = RE2::ErrorPatternTooLarge; 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return NULL; 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return rprog_; 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::~RE2() { 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (suffix_regexp_) 2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) suffix_regexp_->Decref(); 2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (entire_regexp_) 2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) entire_regexp_->Decref(); 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete mutex_; 2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete prog_; 2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete rprog_; 2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (error_ != empty_string) 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete error_; 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (named_groups_ != NULL && named_groups_ != empty_named_groups) 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete named_groups_; 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (group_names_ != NULL && group_names_ != empty_group_names) 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete group_names_; 2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int RE2::ProgramSize() const { 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_ == NULL) 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return prog_->size(); 2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns named_groups_, computing it if needed. 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const map<string, int>& RE2::NamedCapturingGroups() const { 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MutexLock l(mutex_); 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!ok()) 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return *empty_named_groups; 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (named_groups_ == NULL) { 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) named_groups_ = suffix_regexp_->NamedCaptures(); 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (named_groups_ == NULL) 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) named_groups_ = empty_named_groups; 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return *named_groups_; 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns group_names_, computing it if needed. 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const map<int, string>& RE2::CapturingGroupNames() const { 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MutexLock l(mutex_); 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!ok()) 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return *empty_group_names; 3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (group_names_ == NULL) { 3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) group_names_ = suffix_regexp_->CaptureNames(); 3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (group_names_ == NULL) 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) group_names_ = empty_group_names; 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return *group_names_; 3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/***** Convenience interfaces *****/ 3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::FullMatchN(const StringPiece& text, const RE2& re, 3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Arg* const args[], int n) { 3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n); 3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::PartialMatchN(const StringPiece& text, const RE2& re, 3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Arg* const args[], int n) { 3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return re.DoMatch(text, UNANCHORED, NULL, args, n); 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::ConsumeN(StringPiece* input, const RE2& re, 3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Arg* const args[], int n) { 3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int consumed; 3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) { 3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->remove_prefix(consumed); 3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re, 3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Arg* const args[], int n) { 3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int consumed; 3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) { 3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->remove_prefix(consumed); 3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns the maximum submatch needed for the rewrite to be done by Replace(). 3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// E.g. if rewrite == "foo \\2,\\1", returns 2. 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int RE2::MaxSubmatch(const StringPiece& rewrite) { 3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int max = 0; 3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (const char *s = rewrite.data(), *end = s + rewrite.size(); 3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) s < end; s++) { 3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (*s == '\\') { 3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) s++; 3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int c = (s < end) ? *s : -1; 3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (isdigit(c)) { 3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n = (c - '0'); 3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n > max) 3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) max = n; 3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return max; 3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Replace(string *str, 3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const RE2& re, 3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece& rewrite) { 3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece vec[kVecSize]; 3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nvec = 1 + MaxSubmatch(rewrite); 3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nvec > arraysize(vec)) 3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec)) 3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string s; 3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!re.Rewrite(&s, rewrite, vec, nvec)) 3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert(vec[0].begin() >= str->data()); 3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert(vec[0].end() <= str->data()+str->size()); 3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) str->replace(vec[0].data() - str->data(), vec[0].size(), s); 3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int RE2::GlobalReplace(string *str, 3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const RE2& re, 3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece& rewrite) { 3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece vec[kVecSize]; 3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nvec = 1 + MaxSubmatch(rewrite); 3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nvec > arraysize(vec)) 3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* p = str->data(); 3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* ep = p + str->size(); 3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* lastend = NULL; 3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string out; 3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int count = 0; 4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (p <= ep) { 4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!re.Match(*str, p - str->data(), str->size(), UNANCHORED, vec, nvec)) 4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p < vec[0].begin()) 4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out.append(p, vec[0].begin() - p); 4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (vec[0].begin() == lastend && vec[0].size() == 0) { 4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Disallow empty match at end of last match: skip ahead. 4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p < ep) 4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out.append(p, 1); 4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) p++; 4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) re.Rewrite(&out, rewrite, vec, nvec); 4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) p = vec[0].end(); 4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lastend = p; 4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) count++; 4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (count == 0) 4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0; 4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p < ep) 4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out.append(p, ep - p); 4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) swap(out, *str); 4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return count; 4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Extract(const StringPiece &text, 4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const RE2& re, 4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece &rewrite, 4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string *out) { 4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece vec[kVecSize]; 4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nvec = 1 + MaxSubmatch(rewrite); 4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nvec > arraysize(vec)) 4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec)) 4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out->clear(); 4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return re.Rewrite(out, rewrite, vec, nvec); 4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)string RE2::QuoteMeta(const StringPiece& unquoted) { 4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string result; 4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) result.reserve(unquoted.size() << 1); 4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Escape any ascii character not in [A-Za-z_0-9]. 4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Note that it's legal to escape a character even if it has no 4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // special meaning in a regular expression -- so this function does 4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // that. (This also makes it identical to the perl function of the 4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // same name except for the null-character special case; 4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // see `perldoc -f quotemeta`.) 4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int ii = 0; ii < unquoted.length(); ++ii) { 4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Note that using 'isalnum' here raises the benchmark time from 4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 32ns to 58ns: 4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && 4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && 4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (unquoted[ii] < '0' || unquoted[ii] > '9') && 4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unquoted[ii] != '_' && 4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If this is the part of a UTF8 or Latin1 character, we need 4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to copy this byte without escaping. Experimentally this is 4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // what works correctly with the regexp library. 4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) !(unquoted[ii] & 128)) { 4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (unquoted[ii] == '\0') { // Special handling for null chars. 4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Note that this special handling is not strictly required for RE2, 4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // but this quoting is required for other regexp libraries such as 4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // PCRE. 4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Can't use "\\0" since the next character might be a digit. 4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) result += "\\x00"; 4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) result += '\\'; 4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) result += unquoted[ii]; 4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return result; 4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const { 4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_ == NULL) 4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n = prefix_.size(); 4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n > maxlen) 4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) n = maxlen; 4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Determine initial min max from prefix_ literal. 4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string pmin, pmax; 4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pmin = prefix_.substr(0, n); 4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pmax = prefix_.substr(0, n); 4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prefix_foldcase_) { 4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // prefix is ASCII lowercase; change pmin to uppercase. 4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < n; i++) { 4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ('a' <= pmin[i] && pmin[i] <= 'z') 4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pmin[i] += 'A' - 'a'; 4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Add to prefix min max using PossibleMatchRange on regexp. 5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string dmin, dmax; 5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) maxlen -= n; 5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) { 5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pmin += dmin; 5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pmax += dmax; 5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (pmax.size() > 0) { 5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // prog_->PossibleMatchRange has failed us, 5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // but we still have useful information from prefix_. 5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Round up pmax to allow any possible suffix. 5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pmax = PrefixSuccessor(pmax); 5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Nothing useful. 5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *min = ""; 5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *max = ""; 5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *min = pmin; 5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *max = pmax; 5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Avoid possible locale nonsense in standard strcasecmp. 5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The string a is known to be all lowercase. 5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int ascii_strcasecmp(const char* a, const char* b, int len) { 5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *ae = a + len; 5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (; a < ae; a++, b++) { 5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) uint8 x = *a; 5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) uint8 y = *b; 5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ('A' <= y && y <= 'Z') 5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) y += 'a' - 'A'; 5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (x != y) 5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return x - y; 5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0; 5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/***** Actual matching and rewriting code *****/ 5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Match(const StringPiece& text, 5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int startpos, 5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int endpos, 5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Anchor re_anchor, 5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* submatch, 5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nsubmatch) const { 5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!ok() || suffix_regexp_ == NULL) { 5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options_.log_errors()) 5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(ERROR) << "Invalid RE2: " << *error_; 5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (startpos < 0 || startpos > endpos || endpos > text.size()) { 5562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (options_.log_errors()) 5572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LOG(ERROR) << "RE2: invalid startpos, endpos pair."; 5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece subtext = text; 5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) subtext.remove_prefix(startpos); 5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) subtext.remove_suffix(text.size() - endpos); 5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Use DFAs to find exact location of match, filter out non-matches. 5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Don't ask for the location if we won't use it. 5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // SearchDFA can do extra optimizations in that case. 5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece match; 5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* matchp = &match; 5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nsubmatch == 0) 5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) matchp = NULL; 5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int ncap = 1 + NumberOfCapturingGroups(); 5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ncap > nsubmatch) 5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ncap = nsubmatch; 5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If the regexp is anchored explicitly, must not be in middle of text. 5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_->anchor_start() && startpos != 0) 5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If the regexp is anchored explicitly, update re_anchor 5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // so that we can potentially fall into a faster case below. 5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_->anchor_start() && prog_->anchor_end()) 5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) re_anchor = ANCHOR_BOTH; 5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH) 5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) re_anchor = ANCHOR_START; 5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Check for the required prefix, if any. 5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int prefixlen = 0; 5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!prefix_.empty()) { 5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (startpos != 0) 5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefixlen = prefix_.size(); 5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prefixlen > subtext.size()) 5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prefix_foldcase_) { 5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0) 5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0) 6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) subtext.remove_prefix(prefixlen); 6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If there is a required prefix, the anchor must be at least ANCHOR_START. 6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (re_anchor != ANCHOR_BOTH) 6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) re_anchor = ANCHOR_START; 6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::Anchor anchor = Prog::kUnanchored; 6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::MatchKind kind = Prog::kFirstMatch; 6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options_.longest_match()) 6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kind = Prog::kLongestMatch; 6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool skipped_test = false; 6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture); 6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // SearchBitState allocates a bit vector of size prog_->size() * text.size(). 6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // It also allocates a stack of 3-word structures which could potentially 6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // grow as large as prog_->size() * text.size() but in practice is much 6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // smaller. 6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Conditions for using SearchBitState: 6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int MaxBitStateProg = 500; // prog_->size() <= Max. 6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int MaxBitStateVector = 256*1024; // bit vector size <= Max (bits) 6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool can_bit_state = prog_->size() <= MaxBitStateProg; 6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int bit_state_text_max = MaxBitStateVector / prog_->size(); 6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool dfa_failed = false; 6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (re_anchor) { 6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case UNANCHORED: { 6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!prog_->SearchDFA(subtext, text, anchor, kind, 6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) matchp, &dfa_failed, NULL)) { 6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dfa_failed) { 6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Fall back to NFA below. 6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) skipped_test = true; 6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " DFA failed."; 6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " used DFA - no match."; 6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " used DFA - match"; 6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (matchp == NULL) // Matched. Don't care where 6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // SearchDFA set match[0].end() but didn't know where the 6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // match started. Run the regexp backward from match[0].end() 6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to find the longest possible match -- that's where it started. 6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog* prog = ReverseProg(); 6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog == NULL) 6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!prog->SearchDFA(match, text, Prog::kAnchored, 6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::kLongestMatch, &match, &dfa_failed, NULL)) { 6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dfa_failed) { 6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Fall back to NFA below. 6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) skipped_test = true; 6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " reverse DFA failed."; 6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " DFA inconsistency."; 6762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (options_.log_errors()) 6772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LOG(ERROR) << "DFA inconsistency"; 6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " used reverse DFA."; 6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case ANCHOR_BOTH: 6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case ANCHOR_START: 6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (re_anchor == ANCHOR_BOTH) 6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kind = Prog::kFullMatch; 6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) anchor = Prog::kAnchored; 6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If only a small amount of text and need submatch 6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // information anyway and we're going to use OnePass or BitState 6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to get it, we might as well not even bother with the DFA: 6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // OnePass or BitState will be fast enough. 6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // On tiny texts, OnePass outruns even the DFA, and 6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // it doesn't have the shared state and occasional mutex that 6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the DFA does. 7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (can_one_pass && text.size() <= 4096 && 7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ncap > 1 || text.size() <= 8)) { 7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " skipping DFA for OnePass."; 7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) skipped_test = true; 7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) { 7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " skipping DFA for BitState."; 7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) skipped_test = true; 7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!prog_->SearchDFA(subtext, text, anchor, kind, 7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &match, &dfa_failed, NULL)) { 7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dfa_failed) { 7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " DFA failed."; 7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) skipped_test = true; 7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " used DFA - no match."; 7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!skipped_test && ncap <= 1) { 7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We know exactly where it matches. That's enough. 7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ncap == 1) 7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch[0] = match; 7405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 7415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece subtext1; 7425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (skipped_test) { 7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // DFA ran out of memory or was skipped: 7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // need to search in entire original text. 7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) subtext1 = subtext; 7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // DFA found the exact match location: 7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // let NFA run an anchored, full match search 7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to find submatch locations. 7505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) subtext1 = match; 7515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) anchor = Prog::kAnchored; 7525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kind = Prog::kFullMatch; 7535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (can_one_pass && anchor != Prog::kUnanchored) { 7565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " using OnePass."; 7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) { 7612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!skipped_test && options_.log_errors()) 7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(ERROR) << "SearchOnePass inconsistency"; 7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (can_bit_state && subtext1.size() <= bit_state_text_max) { 7665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 7675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 7685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 7695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " using BitState."; 7705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!prog_->SearchBitState(subtext1, text, anchor, 7715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kind, submatch, ncap)) { 7722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!skipped_test && options_.log_errors()) 7735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(ERROR) << "SearchBitState inconsistency"; 7745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 7755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 7775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (FLAGS_trace_re2) 7785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "Match " << trunc(pattern_) 7795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " [" << CEscape(subtext) << "]" 7805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << " using NFA."; 7815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) { 7822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!skipped_test && options_.log_errors()) 7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(ERROR) << "SearchNFA inconsistency"; 7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Adjust overall match for required prefix that we stripped off. 7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prefixlen > 0 && nsubmatch > 0) 7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch[0] = StringPiece(submatch[0].begin() - prefixlen, 7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch[0].size() + prefixlen); 7935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Zero submatches that don't exist in the regexp. 7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = ncap; i < nsubmatch; i++) 7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch[i] = NULL; 7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 7995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Internal matcher - like Match() but takes Args not StringPieces. 8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::DoMatch(const StringPiece& text, 8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Anchor anchor, 8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int* consumed, 8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Arg* const* args, 8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n) const { 8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!ok()) { 8075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options_.log_errors()) 8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(ERROR) << "Invalid RE2: " << *error_; 8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Count number of capture groups needed. 8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nvec; 8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n == 0 && consumed == NULL) 8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nvec = 0; 8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 8175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nvec = n+1; 8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* vec; 8205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece stkvec[kVecSize]; 8215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* heapvec = NULL; 8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nvec <= arraysize(stkvec)) { 8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) vec = stkvec; 8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 8265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) vec = new StringPiece[nvec]; 8275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) heapvec = vec; 8285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!Match(text, 0, text.size(), anchor, vec, nvec)) { 8315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] heapvec; 8325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 8335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if(consumed != NULL) 8365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *consumed = vec[0].end() - text.begin(); 8375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n == 0 || args == NULL) { 8395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We are not interested in results 8405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] heapvec; 8415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 8425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int ncap = NumberOfCapturingGroups(); 8455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ncap < n) { 8465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // RE has fewer capturing groups than number of arg pointers passed in 8475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) VLOG(1) << "Asked for " << n << " but only have " << ncap; 8485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] heapvec; 8495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 8505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If we got here, we must have matched the whole pattern. 8535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < n; i++) { 8545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece& s = vec[i+1]; 8555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!args[i]->Parse(s.data(), s.size())) { 8565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO: Should we indicate what the error was? 8575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) VLOG(1) << "Parse error on #" << i << " " << s << " " 8585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << (void*)s.data() << "/" << s.size(); 8595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] heapvec; 8605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 8615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] heapvec; 8655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 8665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 8675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Append the "rewrite" string, with backslash subsitutions from "vec", 8695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// to string "out". 8705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Rewrite(string *out, const StringPiece &rewrite, 8715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece *vec, int veclen) const { 8725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (const char *s = rewrite.data(), *end = s + rewrite.size(); 8735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) s < end; s++) { 8745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int c = *s; 8755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c == '\\') { 8765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) s++; 8775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = (s < end) ? *s : -1; 8785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (isdigit(c)) { 8795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n = (c - '0'); 8805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n >= veclen) { 8812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (options_.log_errors()) { 8822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LOG(ERROR) << "requested group " << n 8832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) << " in regexp " << rewrite.data(); 8842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 8855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 8865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece snip = vec[n]; 8885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (snip.size() > 0) 8895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out->append(snip.data(), snip.size()); 8905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (c == '\\') { 8915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out->push_back('\\'); 8925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 8932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (options_.log_errors()) 8942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data(); 8955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 8965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 8985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out->push_back(c); 8995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 9025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 9035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Return the number of capturing subpatterns, or -1 if the 9055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regexp wasn't valid on construction. 9065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int RE2::NumberOfCapturingGroups() const { 9075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (suffix_regexp_ == NULL) 9085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 9095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ANNOTATE_BENIGN_RACE(&num_captures_, "benign race: in the worst case" 9105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) " multiple threads end up doing the same work in parallel."); 9115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (num_captures_ == -1) 9125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) num_captures_ = suffix_regexp_->NumCaptures(); 9135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return num_captures_; 9145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 9155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Checks that the rewrite string is well-formed with respect to this 9175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regular expression. 9185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const { 9195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int max_token = -1; 9205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (const char *s = rewrite.data(), *end = s + rewrite.size(); 9215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) s < end; s++) { 9225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int c = *s; 9235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c != '\\') { 9245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 9255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (++s == end) { 9275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *error = "Rewrite schema error: '\\' not allowed at end."; 9285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 9295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = *s; 9315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c == '\\') { 9325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 9335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!isdigit(c)) { 9355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *error = "Rewrite schema error: " 9365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "'\\' must be followed by a digit or '\\'."; 9375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 9385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n = (c - '0'); 9405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (max_token < n) { 9415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) max_token = n; 9425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (max_token > NumberOfCapturingGroups()) { 9465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SStringPrintf(error, "Rewrite schema requests %d matches, " 9475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "but the regexp only has %d parenthesized subexpressions.", 9485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) max_token, NumberOfCapturingGroups()); 9495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 9505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 9525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 9535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/***** Parsers for various types *****/ 9555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_null(const char* str, int n, void* dest) { 9575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We fail if somebody asked us to store into a non-NULL void* pointer 9585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (dest == NULL); 9595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 9605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_string(const char* str, int n, void* dest) { 9625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 9635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) reinterpret_cast<string*>(dest)->assign(str, n); 9645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 9655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 9665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_stringpiece(const char* str, int n, void* dest) { 9685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 9695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) reinterpret_cast<StringPiece*>(dest)->set(str, n); 9705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 9715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 9725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_char(const char* str, int n, void* dest) { 9745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n != 1) return false; 9755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 9765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<char*>(dest)) = str[0]; 9775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 9785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 9795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_uchar(const char* str, int n, void* dest) { 9815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n != 1) return false; 9825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 9835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<unsigned char*>(dest)) = str[0]; 9845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 9855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 9865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Largest number spec that we are willing to parse 9885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int kMaxNumberLength = 32; 9895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// REQUIRES "buf" must have length at least kMaxNumberLength+1 9915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copies "str" into "buf" and null-terminates. 9925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Overwrites *np with the new length. 9935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* TerminateNumber(char* buf, const char* str, int* np) { 9945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n = *np; 9955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n <= 0) return ""; 9965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n > 0 && isspace(*str)) { 9975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We are less forgiving than the strtoxxx() routines and do not 9985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // allow leading spaces. 9995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ""; 10005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 10015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Although buf has a fixed maximum size, we can still handle 10035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // arbitrarily large integers correctly by omitting leading zeros. 10045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (Numbers that are still too long will be out of range.) 10055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Before deciding whether str is too long, 10065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // remove leading zeros with s/000+/00/. 10075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Leaving the leading two zeros in place means that 10085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // we don't change 0000x123 (invalid) into 0x123 (valid). 10095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Skip over leading - before replacing. 10105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool neg = false; 10115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n >= 1 && str[0] == '-') { 10125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) neg = true; 10135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) n--; 10145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) str++; 10155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 10165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n >= 3 && str[0] == '0' && str[1] == '0') { 10185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (n >= 3 && str[2] == '0') { 10195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) n--; 10205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) str++; 10215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 10225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 10235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (neg) { // make room in buf for - 10255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) n++; 10265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) str--; 10275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 10285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n > kMaxNumberLength) return ""; 10305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memmove(buf, str, n); 10325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (neg) { 10335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf[0] = '-'; 10345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 10355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf[n] = '\0'; 10365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *np = n; 10375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return buf; 10385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 10395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_long_radix(const char* str, 10415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n, 10425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void* dest, 10435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int radix) { 10445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n == 0) return false; 10455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char buf[kMaxNumberLength+1]; 10465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) str = TerminateNumber(buf, str, &n); 10475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char* end; 10485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) errno = 0; 10495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) long r = strtol(str, &end, radix); 10505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (end != str + n) return false; // Leftover junk 10515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (errno) return false; 10525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 10535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<long*>(dest)) = r; 10545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 10555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 10565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_ulong_radix(const char* str, 10585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n, 10595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void* dest, 10605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int radix) { 10615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n == 0) return false; 10625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char buf[kMaxNumberLength+1]; 10635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) str = TerminateNumber(buf, str, &n); 10645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (str[0] == '-') { 10655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // strtoul() will silently accept negative numbers and parse 10665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // them. This module is more strict and treats them as errors. 10675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 10685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 10695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char* end; 10715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) errno = 0; 10725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned long r = strtoul(str, &end, radix); 10735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (end != str + n) return false; // Leftover junk 10745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (errno) return false; 10755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 10765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<unsigned long*>(dest)) = r; 10775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 10785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 10795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_short_radix(const char* str, 10815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n, 10825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void* dest, 10835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int radix) { 10845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) long r; 10855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse 10865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((short)r != r) return false; // Out of range 10875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 10885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<short*>(dest)) = r; 10895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 10905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 10915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_ushort_radix(const char* str, 10935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n, 10945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void* dest, 10955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int radix) { 10965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned long r; 10975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse 10985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ushort)r != r) return false; // Out of range 10995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 11005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<unsigned short*>(dest)) = r; 11015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 11025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 11035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_int_radix(const char* str, 11055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n, 11065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void* dest, 11075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int radix) { 11085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) long r; 11095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse 11105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((int)r != r) return false; // Out of range 11115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 11125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<int*>(dest)) = r; 11135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 11145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 11155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_uint_radix(const char* str, 11175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n, 11185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void* dest, 11195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int radix) { 11205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned long r; 11215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse 11225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((uint)r != r) return false; // Out of range 11235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 11245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<unsigned int*>(dest)) = r; 11255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 11265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 11275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_longlong_radix(const char* str, 11295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n, 11305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void* dest, 11315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int radix) { 11325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n == 0) return false; 11335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char buf[kMaxNumberLength+1]; 11345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) str = TerminateNumber(buf, str, &n); 11355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char* end; 11365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) errno = 0; 11375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int64 r = strtoll(str, &end, radix); 11385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (end != str + n) return false; // Leftover junk 11395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (errno) return false; 11405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 11415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<int64*>(dest)) = r; 11425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 11435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 11445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_ulonglong_radix(const char* str, 11465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n, 11475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void* dest, 11485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int radix) { 11495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n == 0) return false; 11505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char buf[kMaxNumberLength+1]; 11515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) str = TerminateNumber(buf, str, &n); 11525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (str[0] == '-') { 11535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // strtoull() will silently accept negative numbers and parse 11545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // them. This module is more strict and treats them as errors. 11555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 11565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 11575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char* end; 11585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) errno = 0; 11595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) uint64 r = strtoull(str, &end, radix); 11605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (end != str + n) return false; // Leftover junk 11615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (errno) return false; 11625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 11635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<uint64*>(dest)) = r; 11645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 11655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 11665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) { 11685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n == 0) return false; 11695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const int kMaxLength = 200; 11705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char buf[kMaxLength]; 11715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n >= kMaxLength) return false; 11725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memcpy(buf, str, n); 11735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf[n] = '\0'; 11745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) errno = 0; 11755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char* end; 11765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) double r; 11775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (isfloat) { 11785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) r = strtof(buf, &end); 11795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 11805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) r = strtod(buf, &end); 11815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 11825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (end != buf + n) return false; // Leftover junk 11835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (errno) return false; 11845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 11855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (isfloat) { 11865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<float*>(dest)) = r; 11875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 11885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *(reinterpret_cast<double*>(dest)) = r; 11895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 11905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 11915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 11925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_double(const char* str, int n, void* dest) { 11945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return parse_double_float(str, n, false, dest); 11955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 11965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool RE2::Arg::parse_float(const char* str, int n, void* dest) { 11985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return parse_double_float(str, n, true, dest); 11995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 12005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DEFINE_INTEGER_PARSERS(name) \ 12035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool RE2::Arg::parse_##name(const char* str, int n, void* dest) { \ 12045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return parse_##name##_radix(str, n, dest, 10); \ 12055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } \ 12065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool RE2::Arg::parse_##name##_hex(const char* str, int n, void* dest) { \ 12075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return parse_##name##_radix(str, n, dest, 16); \ 12085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } \ 12095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool RE2::Arg::parse_##name##_octal(const char* str, int n, void* dest) { \ 12105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return parse_##name##_radix(str, n, dest, 8); \ 12115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } \ 12125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool RE2::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \ 12135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return parse_##name##_radix(str, n, dest, 0); \ 12145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 12155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(short); 12175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(ushort); 12185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(int); 12195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(uint); 12205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(long); 12215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(ulong); 12225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(longlong); 12235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_INTEGER_PARSERS(ulonglong); 12245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef DEFINE_INTEGER_PARSERS 12265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace re2 1228