12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copyright 2003-2009 Google Inc. All rights reserved. 22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Use of this source code is governed by a BSD-style 32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// license that can be found in the LICENSE file. 42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// This is a variant of PCRE's pcrecpp.cc, originally written at Google. 62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The main changes are the addition of the HitLimit method and 72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// compilation as PCRE in namespace re2. 82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <errno.h> 102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/util.h" 112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/flags.h" 122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/pcre.h" 132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#define PCREPORT(level) LOG(level) 152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Default PCRE limits. 172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Defaults chosen to allow a plausible amount of CPU and 182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// not exceed main thread stacks. Note that other threads 192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// often have smaller stacks, and therefore tightening 202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// regexp_stack_limit may frequently be necessary. 212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_int32(regexp_stack_limit, 256<<10, "default PCRE stack limit (bytes)"); 222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_int32(regexp_match_limit, 1000000, 232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson "default PCRE match limit (function calls)"); 242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 { 262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Maximum number of args we can set 282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const int kMaxArgs = 16; 292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace 302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Approximate size of a recursive invocation of PCRE's 322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// internal "match()" frame. This varies depending on the 332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// compiler and architecture, of course, so the constant is 342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// just a conservative estimate. To find the exact number, 352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// run regexp_unittest with --regexp_stack_limit=0 under 362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// a debugger and look at the frames when it crashes. 372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The exact frame size was 656 in production on 2008/02/03. 382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const int kPCREFrameSize = 700; 392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Special name for missing C++ arguments. 412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPCRE::Arg PCRE::no_more_args((void*)NULL); 422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonconst PCRE::PartialMatchFunctor PCRE::PartialMatch = { }; 442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonconst PCRE::FullMatchFunctor PCRE::FullMatch = { } ; 452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonconst PCRE::ConsumeFunctor PCRE::Consume = { }; 462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonconst PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { }; 472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// If a regular expression has no error, its error_ field points here 492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const string empty_string; 502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonvoid PCRE::Init(const char* pattern, Option options, int match_limit, 522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int stack_limit, bool report_errors) { 532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pattern_ = pattern; 542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson options_ = options; 552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson match_limit_ = match_limit; 562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson stack_limit_ = stack_limit; 572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson hit_limit_ = false; 582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson error_ = &empty_string; 592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson report_errors_ = report_errors; 602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_full_ = NULL; 612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_partial_ = NULL; 622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (options & ~(EnabledCompileOptions | EnabledExecOptions)) { 642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson error_ = new string("illegal regexp option"); 652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PCREPORT(ERROR) 662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << "Error compiling '" << pattern << "': illegal regexp option"; 672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_partial_ = Compile(UNANCHORED); 692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re_partial_ != NULL) { 702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_full_ = Compile(ANCHOR_BOTH); 712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPCRE::PCRE(const char* pattern) { 762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Init(pattern, None, 0, 0, true); 772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPCRE::PCRE(const char* pattern, Option option) { 792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Init(pattern, option, 0, 0, true); 802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPCRE::PCRE(const string& pattern) { 822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Init(pattern.c_str(), None, 0, 0, true); 832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPCRE::PCRE(const string& pattern, Option option) { 852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Init(pattern.c_str(), option, 0, 0, true); 862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPCRE::PCRE(const string& pattern, const PCRE_Options& re_option) { 882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Init(pattern.c_str(), re_option.option(), re_option.match_limit(), 892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_option.stack_limit(), re_option.report_errors()); 902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPCRE::PCRE(const char *pattern, const PCRE_Options& re_option) { 932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Init(pattern, re_option.option(), re_option.match_limit(), 942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_option.stack_limit(), re_option.report_errors()); 952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPCRE::~PCRE() { 982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re_full_ != NULL) pcre_free(re_full_); 992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re_partial_ != NULL) pcre_free(re_partial_); 1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (error_ != &empty_string) delete error_; 1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonpcre* PCRE::Compile(Anchor anchor) { 1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Special treatment for anchoring. This is needed because at 1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // runtime pcre only provides an option for anchoring at the 1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // beginning of a string. 1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // 1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // There are three types of anchoring we want: 1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // UNANCHORED Compile the original pattern, and use 1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // a pcre unanchored match. 1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // ANCHOR_START Compile the original pattern, and use 1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // a pcre anchored match. 1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // ANCHOR_BOTH Tack a "\z" to the end of the original pattern 1142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // and use a pcre anchored match. 1152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const char* error; 1172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int eoffset; 1182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pcre* re; 1192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (anchor != ANCHOR_BOTH) { 1202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re = pcre_compile(pattern_.c_str(), 1212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson (options_ & EnabledCompileOptions), 1222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson &error, &eoffset, NULL); 1232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 1242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Tack a '\z' at the end of PCRE. Parenthesize it first so that 1252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // the '\z' applies to all top-level alternatives in the regexp. 1262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string wrapped = "(?:"; // A non-counting grouping operator 1272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson wrapped += pattern_; 1282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson wrapped += ")\\z"; 1292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re = pcre_compile(wrapped.c_str(), 1302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson (options_ & EnabledCompileOptions), 1312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson &error, &eoffset, NULL); 1322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re == NULL) { 1342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (error_ == &empty_string) error_ = new string(error); 1352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error; 1362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return re; 1382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson/***** Convenience interfaces *****/ 1412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::FullMatchFunctor::operator ()(const StringPiece& text, 1432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const PCRE& re, 1442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a0, 1452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a1, 1462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a2, 1472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a3, 1482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a4, 1492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a5, 1502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a6, 1512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a7, 1522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a8, 1532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a9, 1542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a10, 1552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a11, 1562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a12, 1572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a13, 1582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a14, 1592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a15) const { 1602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg* args[kMaxArgs]; 1612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = 0; 1622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a0 == &no_more_args) goto done; args[n++] = &a0; 1632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a1 == &no_more_args) goto done; args[n++] = &a1; 1642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a2 == &no_more_args) goto done; args[n++] = &a2; 1652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a3 == &no_more_args) goto done; args[n++] = &a3; 1662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a4 == &no_more_args) goto done; args[n++] = &a4; 1672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a5 == &no_more_args) goto done; args[n++] = &a5; 1682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a6 == &no_more_args) goto done; args[n++] = &a6; 1692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a7 == &no_more_args) goto done; args[n++] = &a7; 1702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a8 == &no_more_args) goto done; args[n++] = &a8; 1712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a9 == &no_more_args) goto done; args[n++] = &a9; 1722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a10 == &no_more_args) goto done; args[n++] = &a10; 1732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a11 == &no_more_args) goto done; args[n++] = &a11; 1742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a12 == &no_more_args) goto done; args[n++] = &a12; 1752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a13 == &no_more_args) goto done; args[n++] = &a13; 1762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a14 == &no_more_args) goto done; args[n++] = &a14; 1772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a15 == &no_more_args) goto done; args[n++] = &a15; 1782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsondone: 1792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int consumed; 1812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int vec[kVecSize]; 1822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize); 1832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text, 1862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const PCRE& re, 1872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a0, 1882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a1, 1892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a2, 1902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a3, 1912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a4, 1922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a5, 1932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a6, 1942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a7, 1952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a8, 1962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a9, 1972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a10, 1982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a11, 1992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a12, 2002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a13, 2012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a14, 2022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a15) const { 2032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg* args[kMaxArgs]; 2042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = 0; 2052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a0 == &no_more_args) goto done; args[n++] = &a0; 2062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a1 == &no_more_args) goto done; args[n++] = &a1; 2072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a2 == &no_more_args) goto done; args[n++] = &a2; 2082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a3 == &no_more_args) goto done; args[n++] = &a3; 2092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a4 == &no_more_args) goto done; args[n++] = &a4; 2102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a5 == &no_more_args) goto done; args[n++] = &a5; 2112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a6 == &no_more_args) goto done; args[n++] = &a6; 2122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a7 == &no_more_args) goto done; args[n++] = &a7; 2132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a8 == &no_more_args) goto done; args[n++] = &a8; 2142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a9 == &no_more_args) goto done; args[n++] = &a9; 2152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a10 == &no_more_args) goto done; args[n++] = &a10; 2162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a11 == &no_more_args) goto done; args[n++] = &a11; 2172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a12 == &no_more_args) goto done; args[n++] = &a12; 2182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a13 == &no_more_args) goto done; args[n++] = &a13; 2192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a14 == &no_more_args) goto done; args[n++] = &a14; 2202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a15 == &no_more_args) goto done; args[n++] = &a15; 2212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsondone: 2222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int consumed; 2242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int vec[kVecSize]; 2252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize); 2262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::ConsumeFunctor::operator ()(StringPiece* input, 2292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const PCRE& pattern, 2302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a0, 2312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a1, 2322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a2, 2332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a3, 2342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a4, 2352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a5, 2362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a6, 2372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a7, 2382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a8, 2392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a9, 2402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a10, 2412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a11, 2422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a12, 2432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a13, 2442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a14, 2452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a15) const { 2462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg* args[kMaxArgs]; 2472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = 0; 2482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a0 == &no_more_args) goto done; args[n++] = &a0; 2492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a1 == &no_more_args) goto done; args[n++] = &a1; 2502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a2 == &no_more_args) goto done; args[n++] = &a2; 2512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a3 == &no_more_args) goto done; args[n++] = &a3; 2522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a4 == &no_more_args) goto done; args[n++] = &a4; 2532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a5 == &no_more_args) goto done; args[n++] = &a5; 2542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a6 == &no_more_args) goto done; args[n++] = &a6; 2552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a7 == &no_more_args) goto done; args[n++] = &a7; 2562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a8 == &no_more_args) goto done; args[n++] = &a8; 2572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a9 == &no_more_args) goto done; args[n++] = &a9; 2582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a10 == &no_more_args) goto done; args[n++] = &a10; 2592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a11 == &no_more_args) goto done; args[n++] = &a11; 2602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a12 == &no_more_args) goto done; args[n++] = &a12; 2612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a13 == &no_more_args) goto done; args[n++] = &a13; 2622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a14 == &no_more_args) goto done; args[n++] = &a14; 2632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a15 == &no_more_args) goto done; args[n++] = &a15; 2642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsondone: 2652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int consumed; 2672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int vec[kVecSize]; 2682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed, 2692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson args, n, vec, kVecSize)) { 2702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson input->remove_prefix(consumed); 2712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 2722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 2732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 2742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input, 2782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const PCRE& pattern, 2792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a0, 2802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a1, 2812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a2, 2822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a3, 2832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a4, 2842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a5, 2852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a6, 2862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a7, 2872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a8, 2882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a9, 2892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a10, 2902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a11, 2912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a12, 2922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a13, 2932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a14, 2942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg& a15) const { 2952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg* args[kMaxArgs]; 2962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = 0; 2972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a0 == &no_more_args) goto done; args[n++] = &a0; 2982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a1 == &no_more_args) goto done; args[n++] = &a1; 2992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a2 == &no_more_args) goto done; args[n++] = &a2; 3002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a3 == &no_more_args) goto done; args[n++] = &a3; 3012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a4 == &no_more_args) goto done; args[n++] = &a4; 3022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a5 == &no_more_args) goto done; args[n++] = &a5; 3032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a6 == &no_more_args) goto done; args[n++] = &a6; 3042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a7 == &no_more_args) goto done; args[n++] = &a7; 3052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a8 == &no_more_args) goto done; args[n++] = &a8; 3062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a9 == &no_more_args) goto done; args[n++] = &a9; 3072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a10 == &no_more_args) goto done; args[n++] = &a10; 3082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a11 == &no_more_args) goto done; args[n++] = &a11; 3092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a12 == &no_more_args) goto done; args[n++] = &a12; 3102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a13 == &no_more_args) goto done; args[n++] = &a13; 3112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a14 == &no_more_args) goto done; args[n++] = &a14; 3122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (&a15 == &no_more_args) goto done; args[n++] = &a15; 3132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsondone: 3142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int consumed; 3162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int vec[kVecSize]; 3172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed, 3182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson args, n, vec, kVecSize)) { 3192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson input->remove_prefix(consumed); 3202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 3212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 3222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 3232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Replace(string *str, 3272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const PCRE& pattern, 3282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const StringPiece& rewrite) { 3292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int vec[kVecSize]; 3302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize); 3312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (matches == 0) 3322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 3332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string s; 3352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!pattern.Rewrite(&s, rewrite, *str, vec, matches)) 3362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 3372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson assert(vec[0] >= 0); 3392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson assert(vec[1] >= 0); 3402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str->replace(vec[0], vec[1] - vec[0], s); 3412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 3422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint PCRE::GlobalReplace(string *str, 3452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const PCRE& pattern, 3462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const StringPiece& rewrite) { 3472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int count = 0; 3482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int vec[kVecSize]; 3492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string out; 3502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int start = 0; 3512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool last_match_was_empty_string = false; 3522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (; start <= str->length();) { 3542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If the previous match was for the empty string, we shouldn't 3552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // just match again: we'll match in the same way and get an 3562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // infinite loop. Instead, we do the match in a special way: 3572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // anchored -- to force another try at the same position -- 3582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // and with a flag saying that this time, ignore empty matches. 3592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If this special match returns, that means there's a non-empty 3602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // match at this position as well, and we can continue. If not, 3612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // we do what perl does, and just advance by one. 3622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Notice that perl prints '@@@' for this; 3632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // perl -le '$_ = "aa"; s/b*|aa/@/g; print' 3642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int matches; 3652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (last_match_was_empty_string) { 3662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson matches = pattern.TryMatch(*str, start, ANCHOR_START, false, 3672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson vec, kVecSize); 3682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (matches <= 0) { 3692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (start < str->length()) 3702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out.push_back((*str)[start]); 3712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson start++; 3722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson last_match_was_empty_string = false; 3732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson continue; 3742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 3762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson matches = pattern.TryMatch(*str, start, UNANCHORED, true, vec, kVecSize); 3772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (matches <= 0) 3782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 3792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int matchstart = vec[0], matchend = vec[1]; 3812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson assert(matchstart >= start); 3822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson assert(matchend >= matchstart); 3832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out.append(*str, start, matchstart - start); 3852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pattern.Rewrite(&out, rewrite, *str, vec, matches); 3862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson start = matchend; 3872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson count++; 3882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson last_match_was_empty_string = (matchstart == matchend); 3892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (count == 0) 3922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 3932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (start < str->length()) 3952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out.append(*str, start, str->length() - start); 3962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson swap(out, *str); 3972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return count; 3982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Extract(const StringPiece &text, 4012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const PCRE& pattern, 4022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const StringPiece &rewrite, 4032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string *out) { 4042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int vec[kVecSize]; 4052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize); 4062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (matches == 0) 4072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 4082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out->clear(); 4092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return pattern.Rewrite(out, rewrite, text, vec, matches); 4102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstring PCRE::QuoteMeta(const StringPiece& unquoted) { 4132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string result; 4142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson result.reserve(unquoted.size() << 1); 4152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Escape any ascii character not in [A-Za-z_0-9]. 4172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // 4182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Note that it's legal to escape a character even if it has no 4192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // special meaning in a regular expression -- so this function does 4202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // that. (This also makes it identical to the perl function of the 4212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // same name except for the null-character special case; 4222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // see `perldoc -f quotemeta`.) 4232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int ii = 0; ii < unquoted.length(); ++ii) { 4242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Note that using 'isalnum' here raises the benchmark time from 4252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // 32ns to 58ns: 4262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && 4272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && 4282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson (unquoted[ii] < '0' || unquoted[ii] > '9') && 4292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson unquoted[ii] != '_' && 4302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If this is the part of a UTF8 or Latin1 character, we need 4312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // to copy this byte without escaping. Experimentally this is 4322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // what works correctly with the regexp library. 4332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson !(unquoted[ii] & 128)) { 4342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (unquoted[ii] == '\0') { // Special handling for null chars. 4352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Can't use "\\0" since the next character might be a digit. 4362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson result += "\\x00"; 4372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson continue; 4382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson result += '\\'; 4402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson result += unquoted[ii]; 4422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return result; 4452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson/***** Actual matching and rewriting code *****/ 4482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::HitLimit() { 4502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return hit_limit_; 4512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonvoid PCRE::ClearHitLimit() { 4542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson hit_limit_ = 0; 4552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint PCRE::TryMatch(const StringPiece& text, 4582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int startpos, 4592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Anchor anchor, 4602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool empty_ok, 4612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int *vec, 4622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int vecsize) const { 4632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_; 4642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re == NULL) { 4652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PCREPORT(ERROR) << "Matching against invalid re: " << *error_; 4662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 4672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int match_limit = match_limit_; 4702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (match_limit <= 0) { 4712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson match_limit = FLAGS_regexp_match_limit; 4722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int stack_limit = stack_limit_; 4752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (stack_limit <= 0) { 4762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson stack_limit = FLAGS_regexp_stack_limit; 4772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pcre_extra extra = { 0 }; 4802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (match_limit > 0) { 4812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson extra.flags |= PCRE_EXTRA_MATCH_LIMIT; 4822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson extra.match_limit = match_limit; 4832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (stack_limit > 0) { 4852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; 4862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson extra.match_limit_recursion = stack_limit / kPCREFrameSize; 4872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int options = 0; 4902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (anchor != UNANCHORED) 4912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson options |= PCRE_ANCHORED; 4922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!empty_ok) 4932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson options |= PCRE_NOTEMPTY; 4942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int rc = pcre_exec(re, // The regular expression object 4962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson &extra, 4972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson (text.data() == NULL) ? "" : text.data(), 4982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson text.size(), 4992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson startpos, 5002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson options, 5012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson vec, 5022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson vecsize); 5032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Handle errors 5052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (rc == 0) { 5062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // pcre_exec() returns 0 as a special case when the number of 5072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // capturing subpatterns exceeds the size of the vector. 5082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // When this happens, there is a match and the output vector 5092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // is filled, but we miss out on the positions of the extra subpatterns. 5102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson rc = vecsize / 2; 5112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else if (rc < 0) { 5122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson switch (rc) { 5132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case PCRE_ERROR_NOMATCH: 5142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 5152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case PCRE_ERROR_MATCHLIMIT: 5162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Writing to hit_limit is not safe if multiple threads 5172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // are using the PCRE, but the flag is only intended 5182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // for use by unit tests anyway, so we let it go. 5192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson hit_limit_ = true; 5202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PCREPORT(WARNING) << "Exceeded match limit of " << match_limit 5212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " when matching '" << pattern_ << "'" 5222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " against text that is " << text.size() << " bytes."; 5232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 5242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case PCRE_ERROR_RECURSIONLIMIT: 5252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // See comment about hit_limit above. 5262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson hit_limit_ = true; 5272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PCREPORT(WARNING) << "Exceeded stack limit of " << stack_limit 5282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " when matching '" << pattern_ << "'" 5292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " against text that is " << text.size() << " bytes."; 5302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 5312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson default: 5322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // There are other return codes from pcre.h : 5332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // PCRE_ERROR_NULL (-2) 5342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // PCRE_ERROR_BADOPTION (-3) 5352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // PCRE_ERROR_BADMAGIC (-4) 5362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // PCRE_ERROR_UNKNOWN_NODE (-5) 5372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // PCRE_ERROR_NOMEMORY (-6) 5382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // PCRE_ERROR_NOSUBSTRING (-7) 5392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // ... 5402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PCREPORT(ERROR) << "Unexpected return code: " << rc 5412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " when matching '" << pattern_ << "'" 5422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << ", re=" << re 5432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << ", text=" << text 5442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << ", vec=" << vec 5452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << ", vecsize=" << vecsize; 5462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 5472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return rc; 5512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 5522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::DoMatchImpl(const StringPiece& text, 5542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Anchor anchor, 5552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int* consumed, 5562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg* const* args, 5572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 5582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int* vec, 5592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int vecsize) const { 5602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson assert((1 + n) * 3 <= vecsize); // results + PCRE workspace 5612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int matches = TryMatch(text, 0, anchor, true, vec, vecsize); 5622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson assert(matches >= 0); // TryMatch never returns negatives 5632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (matches == 0) 5642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 5652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *consumed = vec[1]; 5672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0 || args == NULL) { 5692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // We are not interested in results 5702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 5712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (NumberOfCapturingGroups() < n) { 5732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // PCRE has fewer capturing groups than number of arg pointers passed in 5742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 5752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If we got here, we must have matched the whole pattern. 5782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // We do not need (can not do) any more checks on the value of 'matches' here 5792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // -- see the comment for TryMatch. 5802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < n; i++) { 5812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const int start = vec[2*(i+1)]; 5822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const int limit = vec[2*(i+1)+1]; 5832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!args[i]->Parse(text.data() + start, limit-start)) { 5842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // TODO: Should we indicate what the error was? 5852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 5862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 5902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 5912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::DoMatch(const StringPiece& text, 5932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Anchor anchor, 5942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int* consumed, 5952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const Arg* const args[], 5962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n) const { 5972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson assert(n >= 0); 5982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson size_t const vecsize = (1 + n) * 3; // results + PCRE workspace 5992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // (as for kVecSize) 6002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int *vec = new int[vecsize]; 6012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize); 6022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete[] vec; 6032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return b; 6042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 6052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Rewrite(string *out, const StringPiece &rewrite, 6072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const StringPiece &text, int *vec, int veclen) const { 6082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int number_of_capturing_groups = NumberOfCapturingGroups(); 6092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (const char *s = rewrite.data(), *end = s + rewrite.size(); 6102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s < end; s++) { 6112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int c = *s; 6122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c == '\\') { 6132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c = *++s; 6142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (isdigit(c)) { 6152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = (c - '0'); 6162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n >= veclen) { 6172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n <= number_of_capturing_groups) { 6182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // unmatched optional capturing group. treat 6192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // its value as empty string; i.e., nothing to append. 6202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 6212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PCREPORT(ERROR) << "requested group " << n 6222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << " in regexp " << rewrite.data(); 6232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 6242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int start = vec[2 * n]; 6272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (start >= 0) 6282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out->append(text.data() + start, vec[2 * n + 1] - start); 6292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else if (c == '\\') { 6302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out->push_back('\\'); 6312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 6322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PCREPORT(ERROR) << "invalid rewrite pattern: " << rewrite.data(); 6332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 6342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 6362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson out->push_back(c); 6372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 6402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 6412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::CheckRewriteString(const StringPiece& rewrite, string* error) const { 6432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int max_token = -1; 6442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (const char *s = rewrite.data(), *end = s + rewrite.size(); 6452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s < end; s++) { 6462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int c = *s; 6472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c != '\\') { 6482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson continue; 6492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (++s == end) { 6512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *error = "Rewrite schema error: '\\' not allowed at end."; 6522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 6532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c = *s; 6552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c == '\\') { 6562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson continue; 6572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!isdigit(c)) { 6592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *error = "Rewrite schema error: " 6602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson "'\\' must be followed by a digit or '\\'."; 6612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 6622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = (c - '0'); 6642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (max_token < n) { 6652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson max_token = n; 6662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (max_token > NumberOfCapturingGroups()) { 6702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson SStringPrintf(error, "Rewrite schema requests %d matches, " 6712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson "but the regexp only has %d parenthesized subexpressions.", 6722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson max_token, NumberOfCapturingGroups()); 6732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 6742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 6762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 6772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Return the number of capturing subpatterns, or -1 if the 6802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// regexp wasn't valid on construction. 6812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint PCRE::NumberOfCapturingGroups() const { 6822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re_partial_ == NULL) return -1; 6832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int result; 6852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson CHECK(pcre_fullinfo(re_partial_, // The regular expression object 6862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson NULL, // We did not study the pattern 6872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PCRE_INFO_CAPTURECOUNT, 6882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson &result) == 0); 6892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return result; 6902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 6912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson/***** Parsers for various types *****/ 6942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_null(const char* str, int n, void* dest) { 6962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // We fail if somebody asked us to store into a non-NULL void* pointer 6972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return (dest == NULL); 6982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 6992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_string(const char* str, int n, void* dest) { 7012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 7022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson reinterpret_cast<string*>(dest)->assign(str, n); 7032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 7042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 7052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_stringpiece(const char* str, int n, void* dest) { 7072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 7082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson reinterpret_cast<StringPiece*>(dest)->set(str, n); 7092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 7102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 7112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_char(const char* str, int n, void* dest) { 7132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n != 1) return false; 7142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 7152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<char*>(dest)) = str[0]; 7162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 7172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 7182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_uchar(const char* str, int n, void* dest) { 7202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n != 1) return false; 7212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 7222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<unsigned char*>(dest)) = str[0]; 7232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 7242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 7252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Largest number spec that we are willing to parse 7272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const int kMaxNumberLength = 32; 7282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1 7302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// PCREQUIPCRES "n > 0" 7312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copies "str" into "buf" and null-terminates if necessary. 7322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Returns one of: 7332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// a. "str" if no termination is needed 7342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// b. "buf" if the string was copied and null-terminated 7352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// c. "" if the input was invalid and has no hope of being parsed 7362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const char* TerminateNumber(char* buf, const char* str, int n) { 7372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((n > 0) && isspace(*str)) { 7382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // We are less forgiving than the strtoxxx() routines and do not 7392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // allow leading spaces. 7402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return ""; 7412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // See if the character right after the input text may potentially 7442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // look like a digit. 7452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (isdigit(str[n]) || 7462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ((str[n] >= 'a') && (str[n] <= 'f')) || 7472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ((str[n] >= 'A') && (str[n] <= 'F'))) { 7482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n > kMaxNumberLength) return ""; // Input too big to be a valid number 7492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson memcpy(buf, str, n); 7502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson buf[n] = '\0'; 7512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return buf; 7522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 7532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // We can parse right out of the supplied string, so return it. 7542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return str; 7552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 7572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_long_radix(const char* str, 7592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 7602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 7612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 7622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0) return false; 7632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char buf[kMaxNumberLength+1]; 7642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str = TerminateNumber(buf, str, n); 7652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char* end; 7662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson errno = 0; 7672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson long r = strtol(str, &end, radix); 7682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (end != str + n) return false; // Leftover junk 7692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (errno) return false; 7702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 7712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<long*>(dest)) = r; 7722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 7732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 7742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_ulong_radix(const char* str, 7762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 7772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 7782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 7792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0) return false; 7802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char buf[kMaxNumberLength+1]; 7812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str = TerminateNumber(buf, str, n); 7822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (str[0] == '-') { 7832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // strtoul() will silently accept negative numbers and parse 7842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // them. This module is more strict and treats them as errors. 7852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 7862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 7872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char* end; 7892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson errno = 0; 7902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson unsigned long r = strtoul(str, &end, radix); 7912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (end != str + n) return false; // Leftover junk 7922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (errno) return false; 7932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 7942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<unsigned long*>(dest)) = r; 7952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 7962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 7972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 7982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_short_radix(const char* str, 7992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 8002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 8012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 8022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson long r; 8032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse 8042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((short)r != r) return false; // Out of range 8052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 8062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<short*>(dest)) = r; 8072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 8082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 8092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_ushort_radix(const char* str, 8112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 8122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 8132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 8142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson unsigned long r; 8152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse 8162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((ushort)r != r) return false; // Out of range 8172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 8182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<unsigned short*>(dest)) = r; 8192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 8202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 8212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_int_radix(const char* str, 8232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 8242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 8252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 8262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson long r; 8272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse 8282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((int)r != r) return false; // Out of range 8292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 8302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<int*>(dest)) = r; 8312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 8322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 8332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_uint_radix(const char* str, 8352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 8362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 8372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 8382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson unsigned long r; 8392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse 8402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((uint)r != r) return false; // Out of range 8412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 8422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<unsigned int*>(dest)) = r; 8432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 8442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 8452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_longlong_radix(const char* str, 8472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 8482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 8492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 8502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0) return false; 8512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char buf[kMaxNumberLength+1]; 8522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str = TerminateNumber(buf, str, n); 8532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char* end; 8542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson errno = 0; 8552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int64 r = strtoll(str, &end, radix); 8562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (end != str + n) return false; // Leftover junk 8572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (errno) return false; 8582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 8592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<int64*>(dest)) = r; 8602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 8612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 8622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_ulonglong_radix(const char* str, 8642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n, 8652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void* dest, 8662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int radix) { 8672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0) return false; 8682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char buf[kMaxNumberLength+1]; 8692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str = TerminateNumber(buf, str, n); 8702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (str[0] == '-') { 8712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // strtoull() will silently accept negative numbers and parse 8722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // them. This module is more strict and treats them as errors. 8732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 8742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 8752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char* end; 8762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson errno = 0; 8772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson uint64 r = strtoull(str, &end, radix); 8782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (end != str + n) return false; // Leftover junk 8792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (errno) return false; 8802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 8812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<uint64*>(dest)) = r; 8822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 8832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 8842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 8852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_double(const char* str, int n, void* dest) { 8862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n == 0) return false; 8872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static const int kMaxLength = 200; 8882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char buf[kMaxLength]; 8892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n >= kMaxLength) return false; 8902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson memcpy(buf, str, n); 8912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson buf[n] = '\0'; 8922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson errno = 0; 8932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char* end; 8942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson double r = strtod(buf, &end); 8952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (end != buf + n) { 8962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#ifdef COMPILER_MSVC 8972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Microsoft's strtod() doesn't handle inf and nan, so we have to 8982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // handle it explicitly. Speed is not important here because this 8992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // code is only called in unit tests. 9002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool pos = true; 9012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const char* i = buf; 9022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ('-' == *i) { 9032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson pos = false; 9042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ++i; 9052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else if ('+' == *i) { 9062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ++i; 9072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (0 == stricmp(i, "inf") || 0 == stricmp(i, "infinity")) { 9092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson r = numeric_limits<double>::infinity(); 9102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!pos) 9112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson r = -r; 9122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else if (0 == stricmp(i, "nan")) { 9132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson r = numeric_limits<double>::quiet_NaN(); 9142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 9152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 9162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#else 9182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; // Leftover junk 9192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#endif 9202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (errno) return false; 9222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 9232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<double*>(dest)) = r; 9242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 9252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 9262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool PCRE::Arg::parse_float(const char* str, int n, void* dest) { 9282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson double r; 9292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!parse_double(str, n, &r)) return false; 9302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dest == NULL) return true; 9312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *(reinterpret_cast<float*>(dest)) = static_cast<float>(r); 9322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 9332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 9342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#define DEFINE_INTEGER_PARSERS(name) \ 9372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool PCRE::Arg::parse_##name(const char* str, int n, void* dest) { \ 9382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return parse_##name##_radix(str, n, dest, 10); \ 9392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } \ 9402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool PCRE::Arg::parse_##name##_hex(const char* str, int n, void* dest) { \ 9412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return parse_##name##_radix(str, n, dest, 16); \ 9422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } \ 9432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool PCRE::Arg::parse_##name##_octal(const char* str, int n, void* dest) { \ 9442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return parse_##name##_radix(str, n, dest, 8); \ 9452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } \ 9462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool PCRE::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \ 9472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return parse_##name##_radix(str, n, dest, 0); \ 9482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 9492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(short); 9512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(ushort); 9522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(int); 9532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(uint); 9542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(long); 9552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(ulong); 9562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(longlong); 9572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonDEFINE_INTEGER_PARSERS(ulonglong); 9582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#undef DEFINE_INTEGER_PARSERS 9602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 9612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} // namespace re2 962