10e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// Copyright 2003-2009 Google Inc. All rights reserved. 20e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// Use of this source code is governed by a BSD-style 30e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// license that can be found in the LICENSE file. 40e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 50e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// This is a variant of PCRE's pcrecpp.cc, originally written at Google. 60e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// The main changes are the addition of the HitLimit method and 70e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// compilation as PCRE in namespace re2. 80e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 90e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org#include <errno.h> 100e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org#include "util/util.h" 110e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org#include "util/flags.h" 120e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org#include "util/pcre.h" 130e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 140e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org#ifdef WIN32 150e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org#define strtoll _strtoi64 160e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org#define strtoull _strtoui64 170e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org#endif 180e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 190e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org#define PCREPORT(level) LOG(level) 200e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 210e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// Default PCRE limits. 220e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// Defaults chosen to allow a plausible amount of CPU and 230e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// not exceed main thread stacks. Note that other threads 240e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// often have smaller stacks, and therefore tightening 250e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// regexp_stack_limit may frequently be necessary. 260e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgDEFINE_int32(regexp_stack_limit, 256<<10, "default PCRE stack limit (bytes)"); 270e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgDEFINE_int32(regexp_match_limit, 1000000, 280e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org "default PCRE match limit (function calls)"); 290e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 30a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.orgnamespace re2 { 310e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 320e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// Maximum number of args we can set 330e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgstatic const int kMaxArgs = 16; 340e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgstatic const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace 350e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 360e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// Approximate size of a recursive invocation of PCRE's 370e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// internal "match()" frame. This varies depending on the 380e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// compiler and architecture, of course, so the constant is 390e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// just a conservative estimate. To find the exact number, 400e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// run regexp_unittest with --regexp_stack_limit=0 under 410e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// a debugger and look at the frames when it crashes. 420e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// The exact frame size was 656 in production on 2008/02/03. 432007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.orgstatic const int kPCREFrameSize = 700; 442007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org 450e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// Special name for missing C++ arguments. 462007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.orgPCRE::Arg PCRE::no_more_args((void*)NULL); 472007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org 482007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.orgconst PCRE::PartialMatchFunctor PCRE::PartialMatch = { }; 492007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.orgconst PCRE::FullMatchFunctor PCRE::FullMatch = { } ; 502007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.orgconst PCRE::ConsumeFunctor PCRE::Consume = { }; 512007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.orgconst PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { }; 522007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org 532007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org// If a regular expression has no error, its error_ field points here 542007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.orgstatic const string empty_string; 552007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org 560e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgvoid PCRE::Init(const char* pattern, Option options, int match_limit, 570e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int stack_limit, bool report_errors) { 580e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org pattern_ = pattern; 590e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org options_ = options; 600e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org match_limit_ = match_limit; 610e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org stack_limit_ = stack_limit; 620e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org hit_limit_ = false; 630e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org error_ = &empty_string; 640e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org report_errors_ = report_errors; 650e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org re_full_ = NULL; 660e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org re_partial_ = NULL; 670e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 680e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (options & ~(EnabledCompileOptions | EnabledExecOptions)) { 690e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org error_ = new string("illegal regexp option"); 700e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org PCREPORT(ERROR) 710e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org << "Error compiling '" << pattern << "': illegal regexp option"; 720e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } else { 730e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org re_partial_ = Compile(UNANCHORED); 740e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (re_partial_ != NULL) { 750e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org re_full_ = Compile(ANCHOR_BOTH); 760e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 770e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 780e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 790e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 800e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgPCRE::PCRE(const char* pattern) { 810e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org Init(pattern, None, 0, 0, true); 820e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 830e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgPCRE::PCRE(const char* pattern, Option option) { 840e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org Init(pattern, option, 0, 0, true); 850e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 860e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgPCRE::PCRE(const string& pattern) { 870e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org Init(pattern.c_str(), None, 0, 0, true); 880e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 890e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgPCRE::PCRE(const string& pattern, Option option) { 900e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org Init(pattern.c_str(), option, 0, 0, true); 91e560834da4ee5a5f38a96a8cb9290c5ce1096989mallinath@webrtc.org} 920e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgPCRE::PCRE(const string& pattern, const PCRE_Options& re_option) { 930e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org Init(pattern.c_str(), re_option.option(), re_option.match_limit(), 940e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org re_option.stack_limit(), re_option.report_errors()); 950e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 960e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 970e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgPCRE::PCRE(const char *pattern, const PCRE_Options& re_option) { 980e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org Init(pattern, re_option.option(), re_option.match_limit(), 990e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org re_option.stack_limit(), re_option.report_errors()); 1000e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 1010e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 102e560834da4ee5a5f38a96a8cb9290c5ce1096989mallinath@webrtc.orgPCRE::~PCRE() { 1032007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org if (re_full_ != NULL) pcre_free(re_full_); 1040e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (re_partial_ != NULL) pcre_free(re_partial_); 1050e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (error_ != &empty_string) delete error_; 1065aed3bb9fb287faecd773b88bb68732c31579590wu@webrtc.org} 1072007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org 1085aed3bb9fb287faecd773b88bb68732c31579590wu@webrtc.orgpcre* PCRE::Compile(Anchor anchor) { 1095aed3bb9fb287faecd773b88bb68732c31579590wu@webrtc.org // Special treatment for anchoring. This is needed because at 1100e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // runtime pcre only provides an option for anchoring at the 1110e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // beginning of a string. 1122007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org // 1132007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org // There are three types of anchoring we want: 1140e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // UNANCHORED Compile the original pattern, and use 1150e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // a pcre unanchored match. 1160e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // ANCHOR_START Compile the original pattern, and use 1170e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // a pcre anchored match. 1182007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org // ANCHOR_BOTH Tack a "\z" to the end of the original pattern 1192007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org // and use a pcre anchored match. 1200e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 1210e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const char* error; 1220e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int eoffset; 1232007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org pcre* re; 1242007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org if (anchor != ANCHOR_BOTH) { 1250e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org re = pcre_compile(pattern_.c_str(), 1260e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org (options_ & EnabledCompileOptions), 1270e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org &error, &eoffset, NULL); 1280e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } else { 1290e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // Tack a '\z' at the end of PCRE. Parenthesize it first so that 1300e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // the '\z' applies to all top-level alternatives in the regexp. 1310e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org string wrapped = "(?:"; // A non-counting grouping operator 1320e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org wrapped += pattern_; 1330e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org wrapped += ")\\z"; 1340e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org re = pcre_compile(wrapped.c_str(), 1350e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org (options_ & EnabledCompileOptions), 1360e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org &error, &eoffset, NULL); 1370e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 1380e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (re == NULL) { 1390e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (error_ == &empty_string) error_ = new string(error); 1400e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error; 1410e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 1420e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return re; 1430e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 1440e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 1450e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org/***** Convenience interfaces *****/ 1460e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 1470e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::FullMatchFunctor::operator ()(const StringPiece& text, 1480e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const PCRE& re, 1490e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a0, 1500e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a1, 1510e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a2, 1520e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a3, 153e560834da4ee5a5f38a96a8cb9290c5ce1096989mallinath@webrtc.org const Arg& a4, 154e560834da4ee5a5f38a96a8cb9290c5ce1096989mallinath@webrtc.org const Arg& a5, 1550e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a6, 1560e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a7, 1570e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a8, 1580e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a9, 1590e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a10, 1600e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a11, 1610e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a12, 1620e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a13, 1630e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a14, 1640e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a15) const { 1650e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg* args[kMaxArgs]; 1660e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int n = 0; 1670e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a0 == &no_more_args) goto done; args[n++] = &a0; 1680e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a1 == &no_more_args) goto done; args[n++] = &a1; 1690e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a2 == &no_more_args) goto done; args[n++] = &a2; 1700e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a3 == &no_more_args) goto done; args[n++] = &a3; 1710e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a4 == &no_more_args) goto done; args[n++] = &a4; 1720e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a5 == &no_more_args) goto done; args[n++] = &a5; 1730e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a6 == &no_more_args) goto done; args[n++] = &a6; 1740e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a7 == &no_more_args) goto done; args[n++] = &a7; 1750e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a8 == &no_more_args) goto done; args[n++] = &a8; 1760e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a9 == &no_more_args) goto done; args[n++] = &a9; 1770e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a10 == &no_more_args) goto done; args[n++] = &a10; 1780e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a11 == &no_more_args) goto done; args[n++] = &a11; 1790e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a12 == &no_more_args) goto done; args[n++] = &a12; 1800e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a13 == &no_more_args) goto done; args[n++] = &a13; 1810e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a14 == &no_more_args) goto done; args[n++] = &a14; 1820e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a15 == &no_more_args) goto done; args[n++] = &a15; 1830e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgdone: 1840e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 1850e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int consumed; 1860e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int vec[kVecSize]; 1870e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize); 1880e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 1890e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 1900e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text, 1910e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const PCRE& re, 1920e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a0, 1930e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a1, 1940e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a2, 1950e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a3, 1962007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org const Arg& a4, 1972007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org const Arg& a5, 1980e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a6, 1990e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a7, 2000e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a8, 2010e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a9, 2020e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a10, 2030e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a11, 2040e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a12, 2050e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a13, 2060e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a14, 2070e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a15) const { 2080e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg* args[kMaxArgs]; 2090e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int n = 0; 2100e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a0 == &no_more_args) goto done; args[n++] = &a0; 2110e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a1 == &no_more_args) goto done; args[n++] = &a1; 2120e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a2 == &no_more_args) goto done; args[n++] = &a2; 2130e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a3 == &no_more_args) goto done; args[n++] = &a3; 2140e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a4 == &no_more_args) goto done; args[n++] = &a4; 2150e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a5 == &no_more_args) goto done; args[n++] = &a5; 2160e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a6 == &no_more_args) goto done; args[n++] = &a6; 2170e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a7 == &no_more_args) goto done; args[n++] = &a7; 2180e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a8 == &no_more_args) goto done; args[n++] = &a8; 2190e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a9 == &no_more_args) goto done; args[n++] = &a9; 2200e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a10 == &no_more_args) goto done; args[n++] = &a10; 2210e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a11 == &no_more_args) goto done; args[n++] = &a11; 2220e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a12 == &no_more_args) goto done; args[n++] = &a12; 2230e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a13 == &no_more_args) goto done; args[n++] = &a13; 2240e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a14 == &no_more_args) goto done; args[n++] = &a14; 2250e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a15 == &no_more_args) goto done; args[n++] = &a15; 2260e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgdone: 2270e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 2280e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int consumed; 2290e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int vec[kVecSize]; 2300e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize); 2312007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org} 2320e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 2330e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::ConsumeFunctor::operator ()(StringPiece* input, 2340e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const PCRE& pattern, 2350e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a0, 2360e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a1, 2370e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a2, 2380e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a3, 2390e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a4, 2400e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a5, 2410e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a6, 2420e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a7, 2430e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a8, 2440e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a9, 2450e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a10, 2460e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a11, 2470e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a12, 2480e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a13, 2490e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a14, 2500e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a15) const { 2510e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg* args[kMaxArgs]; 2520e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int n = 0; 2530e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a0 == &no_more_args) goto done; args[n++] = &a0; 254a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.org if (&a1 == &no_more_args) goto done; args[n++] = &a1; 255a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.org if (&a2 == &no_more_args) goto done; args[n++] = &a2; 2560e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a3 == &no_more_args) goto done; args[n++] = &a3; 2570e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a4 == &no_more_args) goto done; args[n++] = &a4; 2580e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a5 == &no_more_args) goto done; args[n++] = &a5; 2590e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a6 == &no_more_args) goto done; args[n++] = &a6; 2600e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a7 == &no_more_args) goto done; args[n++] = &a7; 2610e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a8 == &no_more_args) goto done; args[n++] = &a8; 2620e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a9 == &no_more_args) goto done; args[n++] = &a9; 2630e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a10 == &no_more_args) goto done; args[n++] = &a10; 2640e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a11 == &no_more_args) goto done; args[n++] = &a11; 2650e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a12 == &no_more_args) goto done; args[n++] = &a12; 2660e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a13 == &no_more_args) goto done; args[n++] = &a13; 2670e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a14 == &no_more_args) goto done; args[n++] = &a14; 2680e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a15 == &no_more_args) goto done; args[n++] = &a15; 2690e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgdone: 2700e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 2710e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int consumed; 2722007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org int vec[kVecSize]; 2732007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed, 2740e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org args, n, vec, kVecSize)) { 2750e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org input->remove_prefix(consumed); 2760e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return true; 2770e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } else { 2780e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 2790e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 2800e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 2810e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 2820e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input, 2830e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const PCRE& pattern, 2840e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a0, 2850e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a1, 2860e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a2, 2870e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a3, 2880e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a4, 2890e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a5, 2900e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a6, 2910e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a7, 2920e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a8, 2930e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a9, 2940e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a10, 2950e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a11, 2960e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a12, 2970e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a13, 2980e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a14, 2990e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg& a15) const { 3000e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg* args[kMaxArgs]; 3012007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org int n = 0; 3020e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a0 == &no_more_args) goto done; args[n++] = &a0; 3030e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a1 == &no_more_args) goto done; args[n++] = &a1; 3040e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a2 == &no_more_args) goto done; args[n++] = &a2; 3050e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a3 == &no_more_args) goto done; args[n++] = &a3; 3060e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a4 == &no_more_args) goto done; args[n++] = &a4; 3070e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a5 == &no_more_args) goto done; args[n++] = &a5; 3080e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a6 == &no_more_args) goto done; args[n++] = &a6; 3090e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a7 == &no_more_args) goto done; args[n++] = &a7; 3100e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a8 == &no_more_args) goto done; args[n++] = &a8; 3110e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a9 == &no_more_args) goto done; args[n++] = &a9; 3120e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a10 == &no_more_args) goto done; args[n++] = &a10; 3130e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a11 == &no_more_args) goto done; args[n++] = &a11; 3140e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a12 == &no_more_args) goto done; args[n++] = &a12; 3150e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a13 == &no_more_args) goto done; args[n++] = &a13; 3160e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a14 == &no_more_args) goto done; args[n++] = &a14; 3170e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (&a15 == &no_more_args) goto done; args[n++] = &a15; 3180e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgdone: 3190e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 3200e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int consumed; 3210e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int vec[kVecSize]; 3220e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed, 3230e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org args, n, vec, kVecSize)) { 3240e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org input->remove_prefix(consumed); 3250e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return true; 3260e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } else { 3270e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 3280e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 3290e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 3300e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 3310e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::Replace(string *str, 3320e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const PCRE& pattern, 3330e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const StringPiece& rewrite) { 3340e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int vec[kVecSize]; 3350e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize); 3360e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (matches == 0) 3370e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 3380e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 3390e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org string s; 3400e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (!pattern.Rewrite(&s, rewrite, *str, vec, matches)) 3410e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 3420e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 3430e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org assert(vec[0] >= 0); 3440e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org assert(vec[1] >= 0); 3450e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org str->replace(vec[0], vec[1] - vec[0], s); 3460e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return true; 3470e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 3480e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 3490e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgint PCRE::GlobalReplace(string *str, 3500e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const PCRE& pattern, 3510e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const StringPiece& rewrite) { 3520e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int count = 0; 3530e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int vec[kVecSize]; 3540e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org string out; 3550e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int start = 0; 3560e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org bool last_match_was_empty_string = false; 3570e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 3580e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org for (; start <= str->length();) { 3590e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // If the previous match was for the empty string, we shouldn't 3600e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // just match again: we'll match in the same way and get an 3610e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // infinite loop. Instead, we do the match in a special way: 3620e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // anchored -- to force another try at the same position -- 3630e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // and with a flag saying that this time, ignore empty matches. 3640e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // If this special match returns, that means there's a non-empty 3652007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org // match at this position as well, and we can continue. If not, 3662007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org // we do what perl does, and just advance by one. 3670e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // Notice that perl prints '@@@' for this; 3680e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // perl -le '$_ = "aa"; s/b*|aa/@/g; print' 3690e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int matches; 3700e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (last_match_was_empty_string) { 3710e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org matches = pattern.TryMatch(*str, start, ANCHOR_START, false, 3720e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org vec, kVecSize); 3730e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (matches <= 0) { 3740e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (start < str->length()) 3750e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org out.push_back((*str)[start]); 3760e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org start++; 3770e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org last_match_was_empty_string = false; 3780e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org continue; 3790e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 3800e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } else { 3810e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org matches = pattern.TryMatch(*str, start, UNANCHORED, true, vec, kVecSize); 3820e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (matches <= 0) 3830e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org break; 3840e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 3850e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int matchstart = vec[0], matchend = vec[1]; 386a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.org assert(matchstart >= start); 3872007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org assert(matchend >= matchstart); 3882007187dab65bb5d6f602355216534d6dd4ceaf2mallinath@webrtc.org 389a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.org out.append(*str, start, matchstart - start); 390a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.org pattern.Rewrite(&out, rewrite, *str, vec, matches); 3910e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org start = matchend; 3920e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org count++; 3930e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org last_match_was_empty_string = (matchstart == matchend); 3940e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 3950e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 3960e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (count == 0) 3970e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return 0; 3980e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 3990e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (start < str->length()) 4000e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org out.append(*str, start, str->length() - start); 4010e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org swap(out, *str); 4020e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return count; 4030e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 4040e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4050e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::Extract(const StringPiece &text, 4060e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const PCRE& pattern, 4070e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const StringPiece &rewrite, 4080e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org string *out) { 4090e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int vec[kVecSize]; 4100e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize); 4110e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (matches == 0) 4120e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 4130e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org out->clear(); 4140e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return pattern.Rewrite(out, rewrite, text, vec, matches); 4150e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 4160e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4170e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgstring PCRE::QuoteMeta(const StringPiece& unquoted) { 4180e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org string result; 4190e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org result.reserve(unquoted.size() << 1); 4200e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4210e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // Escape any ascii character not in [A-Za-z_0-9]. 4220e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // 4230e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // Note that it's legal to escape a character even if it has no 4240e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // special meaning in a regular expression -- so this function does 4250e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // that. (This also makes it identical to the perl function of the 4260e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // same name except for the null-character special case; 4270e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // see `perldoc -f quotemeta`.) 4280e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org for (int ii = 0; ii < unquoted.length(); ++ii) { 4290e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // Note that using 'isalnum' here raises the benchmark time from 4300e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // 32ns to 58ns: 4310e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && 4320e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && 4330e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org (unquoted[ii] < '0' || unquoted[ii] > '9') && 4340e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org unquoted[ii] != '_' && 4350e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // If this is the part of a UTF8 or Latin1 character, we need 4360e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // to copy this byte without escaping. Experimentally this is 4370e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // what works correctly with the regexp library. 4380e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org !(unquoted[ii] & 128)) { 4390e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (unquoted[ii] == '\0') { // Special handling for null chars. 4400e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // Can't use "\\0" since the next character might be a digit. 4410e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org result += "\\x00"; 4420e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org continue; 4430e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 4440e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org result += '\\'; 4450e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 4460e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org result += unquoted[ii]; 4470e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 4480e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4490e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return result; 4500e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 4510e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4520e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org/***** Actual matching and rewriting code *****/ 4530e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4540e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::HitLimit() { 4550e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return hit_limit_; 4560e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 4570e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4580e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgvoid PCRE::ClearHitLimit() { 4590e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org hit_limit_ = 0; 4600e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 4610e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4620e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgint PCRE::TryMatch(const StringPiece& text, 4630e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int startpos, 4640e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org Anchor anchor, 4650e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org bool empty_ok, 4660e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int *vec, 4670e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int vecsize) const { 4680e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_; 4690e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (re == NULL) { 4700e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org PCREPORT(ERROR) << "Matching against invalid re: " << *error_; 4710e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return 0; 4720e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 4730e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4740e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int match_limit = match_limit_; 4750e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (match_limit <= 0) { 4760e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org match_limit = FLAGS_regexp_match_limit; 4770e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 4780e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4790e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int stack_limit = stack_limit_; 4800e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (stack_limit <= 0) { 4810e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org stack_limit = FLAGS_regexp_stack_limit; 4820e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 4830e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4840e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org pcre_extra extra = { 0 }; 4850e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (match_limit > 0) { 4860e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org extra.flags |= PCRE_EXTRA_MATCH_LIMIT; 4870e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org extra.match_limit = match_limit; 4880e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 4890e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (stack_limit > 0) { 4900e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; 4910e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org extra.match_limit_recursion = stack_limit / kPCREFrameSize; 4920e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 4930e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 4940e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int options = 0; 4950e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (anchor != UNANCHORED) 4960e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org options |= PCRE_ANCHORED; 4970e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (!empty_ok) 4980e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org options |= PCRE_NOTEMPTY; 4990e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 5000e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int rc = pcre_exec(re, // The regular expression object 5010e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org &extra, 5020e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org (text.data() == NULL) ? "" : text.data(), 5030e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org text.size(), 5040e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org startpos, 5050e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org options, 5060e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org vec, 5070e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org vecsize); 5080e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 5090e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // Handle errors 5100e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (rc == 0) { 5110e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // pcre_exec() returns 0 as a special case when the number of 5120e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // capturing subpatterns exceeds the size of the vector. 5130e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // When this happens, there is a match and the output vector 5140e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // is filled, but we miss out on the positions of the extra subpatterns. 5150e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org rc = vecsize / 2; 5160e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } else if (rc < 0) { 5170e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org switch (rc) { 5180e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org case PCRE_ERROR_NOMATCH: 5190e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return 0; 5200e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org case PCRE_ERROR_MATCHLIMIT: 5210e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // Writing to hit_limit is not safe if multiple threads 5220e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // are using the PCRE, but the flag is only intended 5230e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // for use by unit tests anyway, so we let it go. 5240e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org hit_limit_ = true; 5250e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org PCREPORT(WARNING) << "Exceeded match limit of " << match_limit 5260e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org << " when matching '" << pattern_ << "'" 5270e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org << " against text that is " << text.size() << " bytes."; 5280e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return 0; 5290e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org case PCRE_ERROR_RECURSIONLIMIT: 5300e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // See comment about hit_limit above. 5310e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org hit_limit_ = true; 5320e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org PCREPORT(WARNING) << "Exceeded stack limit of " << stack_limit 5330e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org << " when matching '" << pattern_ << "'" 5340e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org << " against text that is " << text.size() << " bytes."; 5350e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return 0; 5360e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org default: 5370e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // There are other return codes from pcre.h : 5380e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // PCRE_ERROR_NULL (-2) 5390e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // PCRE_ERROR_BADOPTION (-3) 5400e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // PCRE_ERROR_BADMAGIC (-4) 5410e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // PCRE_ERROR_UNKNOWN_NODE (-5) 5420e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // PCRE_ERROR_NOMEMORY (-6) 5430e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // PCRE_ERROR_NOSUBSTRING (-7) 5440e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // ... 5450e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org PCREPORT(ERROR) << "Unexpected return code: " << rc 5460e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org << " when matching '" << pattern_ << "'" 5470e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org << ", re=" << re 5480e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org << ", text=" << text 5490e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org << ", vec=" << vec 5500e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org << ", vecsize=" << vecsize; 5510e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return 0; 5520e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 5530e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 5540e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 5550e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return rc; 5560e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 5570e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 5580e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::DoMatchImpl(const StringPiece& text, 5590e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org Anchor anchor, 5600e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int* consumed, 5610e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg* const* args, 5620e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int n, 5630e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int* vec, 5640e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int vecsize) const { 5650e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org assert((1 + n) * 3 <= vecsize); // results + PCRE workspace 566e560834da4ee5a5f38a96a8cb9290c5ce1096989mallinath@webrtc.org int matches = TryMatch(text, 0, anchor, true, vec, vecsize); 5670e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org assert(matches >= 0); // TryMatch never returns negatives 568e560834da4ee5a5f38a96a8cb9290c5ce1096989mallinath@webrtc.org if (matches == 0) 5690e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 5700e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 571e560834da4ee5a5f38a96a8cb9290c5ce1096989mallinath@webrtc.org *consumed = vec[1]; 5720e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 5730e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (n == 0 || args == NULL) { 5740e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // We are not interested in results 5750e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return true; 5760e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 5770e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (NumberOfCapturingGroups() < n) { 5780e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // PCRE has fewer capturing groups than number of arg pointers passed in 5790e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 5800e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 5810e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 5820e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // If we got here, we must have matched the whole pattern. 5830e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // We do not need (can not do) any more checks on the value of 'matches' here 5840e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // -- see the comment for TryMatch. 5850e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org for (int i = 0; i < n; i++) { 5860e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const int start = vec[2*(i+1)]; 5870e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const int limit = vec[2*(i+1)+1]; 5880e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (!args[i]->Parse(text.data() + start, limit-start)) { 5890e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // TODO: Should we indicate what the error was? 5900e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 5910e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 5920e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 5930e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 5940e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return true; 5950e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 5960e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 5970e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::DoMatch(const StringPiece& text, 5980e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org Anchor anchor, 5990e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int* consumed, 6000e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const Arg* const args[], 6010e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int n) const { 6020e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org assert(n >= 0); 6030e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org size_t const vecsize = (1 + n) * 3; // results + PCRE workspace 6040e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // (as for kVecSize) 6050e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int *vec = new int[vecsize]; 6060e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize); 6070e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org delete[] vec; 6080e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return b; 6090e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 6100e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 6110e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::Rewrite(string *out, const StringPiece &rewrite, 6120e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org const StringPiece &text, int *vec, int veclen) const { 6130e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int number_of_capturing_groups = NumberOfCapturingGroups(); 6140e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org for (const char *s = rewrite.data(), *end = s + rewrite.size(); 6150e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org s < end; s++) { 6160e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int c = *s; 6170e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (c == '\\') { 6180e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org c = *++s; 6190e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (isdigit(c)) { 6200e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int n = (c - '0'); 6210e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (n >= veclen) { 6220e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (n <= number_of_capturing_groups) { 6230e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // unmatched optional capturing group. treat 6240e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // its value as empty string; i.e., nothing to append. 6250e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } else { 6260e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org PCREPORT(ERROR) << "requested group " << n 6270e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org << " in regexp " << rewrite.data(); 6280e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 6290e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 6300e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 6310e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int start = vec[2 * n]; 6320e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (start >= 0) 6330e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org out->append(text.data() + start, vec[2 * n + 1] - start); 6340e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } else if (c == '\\') { 635a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.org out->push_back('\\'); 6360e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } else { 6370e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org PCREPORT(ERROR) << "invalid rewrite pattern: " << rewrite.data(); 6380e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 639a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.org } 6400e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } else { 6410e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org out->push_back(c); 642a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.org } 6430e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 6440e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return true; 6450e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 6460e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 6470e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::CheckRewriteString(const StringPiece& rewrite, string* error) const { 648a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.org int max_token = -1; 6490e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org for (const char *s = rewrite.data(), *end = s + rewrite.size(); 6500e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org s < end; s++) { 6510e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int c = *s; 6520e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (c != '\\') { 6530e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org continue; 6540e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 6550e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (++s == end) { 6560e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org *error = "Rewrite schema error: '\\' not allowed at end."; 6570e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 6580e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 6590e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org c = *s; 6600e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (c == '\\') { 6610e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org continue; 6620e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 6630e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (!isdigit(c)) { 6640e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org *error = "Rewrite schema error: " 6650e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org "'\\' must be followed by a digit or '\\'."; 6660e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return false; 6670e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 6680e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int n = (c - '0'); 6690e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (max_token < n) { 6700e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org max_token = n; 6710e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 6720e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 6730e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 674e560834da4ee5a5f38a96a8cb9290c5ce1096989mallinath@webrtc.org if (max_token > NumberOfCapturingGroups()) { 6750e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org SStringPrintf(error, "Rewrite schema requests %d matches, " 676e560834da4ee5a5f38a96a8cb9290c5ce1096989mallinath@webrtc.org "but the regexp only has %d parenthesized subexpressions.", 6770e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org max_token, NumberOfCapturingGroups()); 678e560834da4ee5a5f38a96a8cb9290c5ce1096989mallinath@webrtc.org return false; 6790e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 6800e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return true; 6810e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 6820e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 6830e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 6840e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// Return the number of capturing subpatterns, or -1 if the 6850e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// regexp wasn't valid on construction. 6860e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgint PCRE::NumberOfCapturingGroups() const { 6870e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (re_partial_ == NULL) return -1; 6880e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 6890e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int result; 6900e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org CHECK(pcre_fullinfo(re_partial_, // The regular expression object 6910e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org NULL, // We did not study the pattern 692a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.org PCRE_INFO_CAPTURECOUNT, 693a487db2aeda23ade81f0b2e5fd4d50f874d06a9csergeyu@chromium.org &result) == 0); 6940e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return result; 6950e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 6960e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 6970e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 6980e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org/***** Parsers for various types *****/ 6990e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 7000e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::Arg::parse_null(const char* str, int n, void* dest) { 7010e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // We fail if somebody asked us to store into a non-NULL void* pointer 7020e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return (dest == NULL); 7030e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 7040e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 7050e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::Arg::parse_string(const char* str, int n, void* dest) { 7060e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (dest == NULL) return true; 7070e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org reinterpret_cast<string*>(dest)->assign(str, n); 7080e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return true; 7090e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 7100e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 7110e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::Arg::parse_stringpiece(const char* str, int n, void* dest) { 7120e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (dest == NULL) return true; 7130e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org reinterpret_cast<StringPiece*>(dest)->set(str, n); 7140e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return true; 7150e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 7160e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 7170e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::Arg::parse_char(const char* str, int n, void* dest) { 7180e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (n != 1) return false; 7190e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (dest == NULL) return true; 7200e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org *(reinterpret_cast<char*>(dest)) = str[0]; 7210e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return true; 7220e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 7230e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 7240e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::Arg::parse_uchar(const char* str, int n, void* dest) { 7250e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (n != 1) return false; 7260e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (dest == NULL) return true; 7270e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org *(reinterpret_cast<unsigned char*>(dest)) = str[0]; 7280e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return true; 7290e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 7300e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 7310e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// Largest number spec that we are willing to parse 7320e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgstatic const int kMaxNumberLength = 32; 7330e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 7340e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1 7350e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// PCREQUIPCRES "n > 0" 7360e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// Copies "str" into "buf" and null-terminates if necessary. 7370e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// Returns one of: 7380e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// a. "str" if no termination is needed 7390e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// b. "buf" if the string was copied and null-terminated 7400e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org// c. "" if the input was invalid and has no hope of being parsed 7410e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgstatic const char* TerminateNumber(char* buf, const char* str, int n) { 7420e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if ((n > 0) && isspace(*str)) { 7430e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // We are less forgiving than the strtoxxx() routines and do not 7440e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // allow leading spaces. 7450e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return ""; 7460e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 7470e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 7480e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // See if the character right after the input text may potentially 7490e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // look like a digit. 7500e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (isdigit(str[n]) || 7510e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org ((str[n] >= 'a') && (str[n] <= 'f')) || 7520e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org ((str[n] >= 'A') && (str[n] <= 'F'))) { 7530e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (n > kMaxNumberLength) return ""; // Input too big to be a valid number 7540e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org memcpy(buf, str, n); 7550e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org buf[n] = '\0'; 7560e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return buf; 7570e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } else { 7580e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org // We can parse right out of the supplied string, so return it. 7590e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org return str; 7600e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org } 7610e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org} 7620e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org 7630e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.orgbool PCRE::Arg::parse_long_radix(const char* str, 7640e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int n, 7650e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org void* dest, 7660e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org int radix) { 7670e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (n == 0) return false; 7680e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org char buf[kMaxNumberLength+1]; 7690e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org str = TerminateNumber(buf, str, n); 7700e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org char* end; 7710e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org errno = 0; 7720e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org long r = strtol(str, &end, radix); 7730e118e7129884fbea117e78d6f2068139a414dbhenrike@webrtc.org if (end != str + n) return false; // Leftover junk 774 if (errno) return false; 775 if (dest == NULL) return true; 776 *(reinterpret_cast<long*>(dest)) = r; 777 return true; 778} 779 780bool PCRE::Arg::parse_ulong_radix(const char* str, 781 int n, 782 void* dest, 783 int radix) { 784 if (n == 0) return false; 785 char buf[kMaxNumberLength+1]; 786 str = TerminateNumber(buf, str, n); 787 if (str[0] == '-') { 788 // strtoul() will silently accept negative numbers and parse 789 // them. This module is more strict and treats them as errors. 790 return false; 791 } 792 793 char* end; 794 errno = 0; 795 unsigned long r = strtoul(str, &end, radix); 796 if (end != str + n) return false; // Leftover junk 797 if (errno) return false; 798 if (dest == NULL) return true; 799 *(reinterpret_cast<unsigned long*>(dest)) = r; 800 return true; 801} 802 803bool PCRE::Arg::parse_short_radix(const char* str, 804 int n, 805 void* dest, 806 int radix) { 807 long r; 808 if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse 809 if ((short)r != r) return false; // Out of range 810 if (dest == NULL) return true; 811 *(reinterpret_cast<short*>(dest)) = r; 812 return true; 813} 814 815bool PCRE::Arg::parse_ushort_radix(const char* str, 816 int n, 817 void* dest, 818 int radix) { 819 unsigned long r; 820 if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse 821 if ((ushort)r != r) return false; // Out of range 822 if (dest == NULL) return true; 823 *(reinterpret_cast<unsigned short*>(dest)) = r; 824 return true; 825} 826 827bool PCRE::Arg::parse_int_radix(const char* str, 828 int n, 829 void* dest, 830 int radix) { 831 long r; 832 if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse 833 if ((int)r != r) return false; // Out of range 834 if (dest == NULL) return true; 835 *(reinterpret_cast<int*>(dest)) = r; 836 return true; 837} 838 839bool PCRE::Arg::parse_uint_radix(const char* str, 840 int n, 841 void* dest, 842 int radix) { 843 unsigned long r; 844 if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse 845 if ((uint)r != r) return false; // Out of range 846 if (dest == NULL) return true; 847 *(reinterpret_cast<unsigned int*>(dest)) = r; 848 return true; 849} 850 851bool PCRE::Arg::parse_longlong_radix(const char* str, 852 int n, 853 void* dest, 854 int radix) { 855 if (n == 0) return false; 856 char buf[kMaxNumberLength+1]; 857 str = TerminateNumber(buf, str, n); 858 char* end; 859 errno = 0; 860 int64 r = strtoll(str, &end, radix); 861 if (end != str + n) return false; // Leftover junk 862 if (errno) return false; 863 if (dest == NULL) return true; 864 *(reinterpret_cast<int64*>(dest)) = r; 865 return true; 866} 867 868bool PCRE::Arg::parse_ulonglong_radix(const char* str, 869 int n, 870 void* dest, 871 int radix) { 872 if (n == 0) return false; 873 char buf[kMaxNumberLength+1]; 874 str = TerminateNumber(buf, str, n); 875 if (str[0] == '-') { 876 // strtoull() will silently accept negative numbers and parse 877 // them. This module is more strict and treats them as errors. 878 return false; 879 } 880 char* end; 881 errno = 0; 882 uint64 r = strtoull(str, &end, radix); 883 if (end != str + n) return false; // Leftover junk 884 if (errno) return false; 885 if (dest == NULL) return true; 886 *(reinterpret_cast<uint64*>(dest)) = r; 887 return true; 888} 889 890bool PCRE::Arg::parse_double(const char* str, int n, void* dest) { 891 if (n == 0) return false; 892 static const int kMaxLength = 200; 893 char buf[kMaxLength]; 894 if (n >= kMaxLength) return false; 895 memcpy(buf, str, n); 896 buf[n] = '\0'; 897 errno = 0; 898 char* end; 899 double r = strtod(buf, &end); 900 if (end != buf + n) { 901#ifdef COMPILER_MSVC 902 // Microsoft's strtod() doesn't handle inf and nan, so we have to 903 // handle it explicitly. Speed is not important here because this 904 // code is only called in unit tests. 905 bool pos = true; 906 const char* i = buf; 907 if ('-' == *i) { 908 pos = false; 909 ++i; 910 } else if ('+' == *i) { 911 ++i; 912 } 913 if (0 == stricmp(i, "inf") || 0 == stricmp(i, "infinity")) { 914 r = numeric_limits<double>::infinity(); 915 if (!pos) 916 r = -r; 917 } else if (0 == stricmp(i, "nan")) { 918 r = numeric_limits<double>::quiet_NaN(); 919 } else { 920 return false; 921 } 922#else 923 return false; // Leftover junk 924#endif 925 } 926 if (errno) return false; 927 if (dest == NULL) return true; 928 *(reinterpret_cast<double*>(dest)) = r; 929 return true; 930} 931 932bool PCRE::Arg::parse_float(const char* str, int n, void* dest) { 933 double r; 934 if (!parse_double(str, n, &r)) return false; 935 if (dest == NULL) return true; 936 *(reinterpret_cast<float*>(dest)) = static_cast<float>(r); 937 return true; 938} 939 940 941#define DEFINE_INTEGER_PARSERS(name) \ 942 bool PCRE::Arg::parse_##name(const char* str, int n, void* dest) { \ 943 return parse_##name##_radix(str, n, dest, 10); \ 944 } \ 945 bool PCRE::Arg::parse_##name##_hex(const char* str, int n, void* dest) { \ 946 return parse_##name##_radix(str, n, dest, 16); \ 947 } \ 948 bool PCRE::Arg::parse_##name##_octal(const char* str, int n, void* dest) { \ 949 return parse_##name##_radix(str, n, dest, 8); \ 950 } \ 951 bool PCRE::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \ 952 return parse_##name##_radix(str, n, dest, 0); \ 953 } 954 955DEFINE_INTEGER_PARSERS(short); 956DEFINE_INTEGER_PARSERS(ushort); 957DEFINE_INTEGER_PARSERS(int); 958DEFINE_INTEGER_PARSERS(uint); 959DEFINE_INTEGER_PARSERS(long); 960DEFINE_INTEGER_PARSERS(ulong); 961DEFINE_INTEGER_PARSERS(longlong); 962DEFINE_INTEGER_PARSERS(ulonglong); 963 964#undef DEFINE_INTEGER_PARSERS 965 966} // namespace re2 967