15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2003-2009 The RE2 Authors. All Rights Reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef RE2_RE2_H 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define RE2_RE2_H 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// C++ interface to the re2 regular-expression library. 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// RE2 supports Perl-style regular expressions (with extensions like 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// \d, \w, \s, ...). 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------- 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// REGEXP SYNTAX: 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This module uses the re2 library and hence supports 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// its syntax for regular expressions, which is similar to Perl's with 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// some of the more complicated things thrown away. In particular, 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// backreferences and generalized assertions are not available, nor is \Z. 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// See http://code.google.com/p/re2/wiki/Syntax for the syntax 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// supported by RE2, and a comparison with PCRE and PERL regexps. 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// For those not familiar with Perl's regular expressions, 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// here are some examples of the most commonly used extensions: 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// "hello (\\w+) world" -- \w matches a "word" character 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// "version (\\d+)" -- \d matches a digit 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// "hello\\s+world" -- \s matches any whitespace character 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// "\\b(\\w+)\\b" -- \b matches non-empty string at word boundary 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// "(?i)hello" -- (?i) turns on case-insensitive matching 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------- 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// MATCHING INTERFACE: 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The "FullMatch" operation checks that supplied text matches a 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// supplied pattern exactly. 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: successful match 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(RE2::FullMatch("hello", "h.*o")); 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: unsuccessful match (requires full match): 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(!RE2::FullMatch("hello", "e")); 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------- 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// UTF-8 AND THE MATCHING INTERFACE: 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// By default, the pattern and input text are interpreted as UTF-8. 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The RE2::Latin1 option causes them to be interpreted as Latin-1. 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern))); 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1))); 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------- 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// MATCHING WITH SUB-STRING EXTRACTION: 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// You can supply extra pointer arguments to extract matched subpieces. 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: extracts "ruby" into "s" and 1234 into "i" 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// int i; 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// string s; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: fails because string cannot be stored in integer 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(!RE2::FullMatch("ruby", "(.*)", &i)); 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: fails because there aren't enough sub-patterns: 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s)); 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: does not try to extract any extra sub-patterns 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s)); 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: does not try to extract into NULL 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i)); 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: integer overflow causes failure 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i)); 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// NOTE(rsc): Asking for substrings slows successful matches quite a bit. 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This may get a little faster in the future, but right now is slower 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// than PCRE. On the other hand, failed matches run *very* fast (faster 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// than PCRE), as do matches without substring extraction. 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------- 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// PARTIAL MATCHES 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// You can use the "PartialMatch" operation when you want the pattern 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// to match any substring of the text. 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: simple search for a string: 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(RE2::PartialMatch("hello", "ell")); 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: find first number in a string 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// int number; 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number)); 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK_EQ(number, 100); 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------- 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// PRE-COMPILED REGULAR EXPRESSIONS 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// RE2 makes it easy to use any string as a regular expression, without 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// requiring a separate compilation step. 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If speed is of the essence, you can create a pre-compiled "RE2" 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// object from the pattern and use it multiple times. If you do so, 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// you can typically parse text faster than with sscanf. 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: precompile pattern for faster matching: 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// RE2 pattern("h.*o"); 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// while (ReadLine(&str)) { 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// if (RE2::FullMatch(str, pattern)) ...; 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------- 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// SCANNING TEXT INCREMENTALLY 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The "Consume" operation may be useful if you want to repeatedly 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// match regular expressions at the front of a string and skip over 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// them as they match. This requires use of the "StringPiece" type, 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// which represents a sub-range of a real string. 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: read lines of the form "var = value" from a string. 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// string contents = ...; // Fill string somehow 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StringPiece input(contents); // Wrap a StringPiece around it 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// string var; 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// int value; 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) { 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ...; 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Each successful call to "Consume" will set "var/value", and also 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// advance "input" so it points past the matched text. Note that if the 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regular expression matches an empty string, input will advance 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// by 0 bytes. If the regular expression being used might match 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// an empty string, the loop body must check for this case and either 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// advance the string or break out of the loop. 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The "FindAndConsume" operation is similar to "Consume" but does not 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// anchor your match at the beginning of the string. For example, you 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// could extract all words from a string by repeatedly calling 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// RE2::FindAndConsume(&input, "(\\w+)", &word) 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------- 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// USING VARIABLE NUMBER OF ARGUMENTS 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The above operations require you to know the number of arguments 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// when you write the code. This is not always possible or easy (for 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// example, the regular expression may be calculated at run time). 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// You can use the "N" version of the operations when the number of 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// match arguments are determined at run time. 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// const RE2::Arg* args[10]; 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// int n; 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// // ... populate args with pointers to RE2::Arg values ... 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// // ... set n to the number of RE2::Arg objects ... 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// bool match = RE2::FullMatchN(input, pattern, args, n); 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The last statement is equivalent to 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// bool match = RE2::FullMatch(input, pattern, 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// *args[0], *args[1], ..., *args[n - 1]); 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------- 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// PARSING HEX/OCTAL/C-RADIX NUMBERS 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// By default, if you pass a pointer to a numeric value, the 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// corresponding text is interpreted as a base-10 number. You can 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// instead wrap the pointer with a call to one of the operators Hex(), 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Octal(), or CRadix() to interpret the text in another base. The 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// prefixes, but defaults to base-10. 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Example: 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// int a, b, c, d; 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)", 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d)); 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// will leave 64 in a, b, c, and d. 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdint.h> 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <map> 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/stringpiece.h" 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/variadic_function.h" 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 { 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using std::string; 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using std::map; 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Mutex; 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Prog; 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Regexp; 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The following enum should be used only as a constructor argument to indicate 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// that the variable has static storage class, and that the constructor should 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// do nothing to its state. It indicates to the reader that it is legal to 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// declare a static instance of the class, provided the constructor is given 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the LINKER_INITIALIZED argument. Normally, it is unsafe to declare a 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static variable that has a constructor or a destructor because invocation 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// order is undefined. However, IF the type can be initialized by filling with 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// zeroes (which the loader does for static variables), AND the type's 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// destructor does nothing to the storage, then a constructor for static 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// initialization can be declared as 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// explicit MyClass(LinkerInitialized x) {} 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and invoked as 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static MyClass my_variable_name(LINKER_INITIALIZED); 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum LinkerInitialized { LINKER_INITIALIZED }; 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Interface for regular expression matching. Also corresponds to a 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// pre-compiled regular expression. An "RE2" object is safe for 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// concurrent use by multiple threads. 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class RE2 { 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We convert user-passed pointers into special Arg objects 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) class Arg; 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) class Options; 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Defined in set.h. 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) class Set; 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enum ErrorCode { 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NoError = 0, 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Unexpected error 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorInternal, 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Parse errors 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorBadEscape, // bad escape sequence 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorBadCharClass, // bad character class 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorBadCharRange, // bad character class range 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorMissingBracket, // missing closing ] 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorMissingParen, // missing closing ) 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorTrailingBackslash, // trailing \ at end of regexp 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorRepeatArgument, // repeat argument missing, e.g. "*" 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorRepeatSize, // bad repetition argument 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorRepeatOp, // bad repetition operator 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorBadPerlOp, // bad perl operator 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorBadUTF8, // invalid UTF-8 in regexp 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorBadNamedCapture, // bad named capture group 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorPatternTooLarge, // pattern too large (compile failed) 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Predefined common options. 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If you need more complicated things, instantiate 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // an Option class, possibly passing one of these to 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the Option constructor, change the settings, and pass that 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Option class to the RE2 constructor. 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enum CannedOptions { 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DefaultOptions = 0, 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Latin1, // treat input as Latin-1 (default UTF-8) 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) POSIX_SYNTAX, // POSIX syntax, leftmost-longest match 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Quiet // do not log about regexp parse errors 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Need to have the const char* and const string& forms for implicit 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // conversions when passing string literals to FullMatch and PartialMatch. 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Otherwise the StringPiece form would be sufficient. 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef SWIG 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RE2(const char* pattern); 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RE2(const string& pattern); 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RE2(const StringPiece& pattern); 2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RE2(const StringPiece& pattern, const Options& option); 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~RE2(); 2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns whether RE2 was created properly. 2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool ok() const { return error_code() == NoError; } 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The string specification for this RE2. E.g. 2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // RE2 re("ab*c?d+"); 2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // re.pattern(); // "ab*c?d+" 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const string& pattern() const { return pattern_; } 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If RE2 could not be created properly, returns an error string. 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Else returns the empty string. 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const string& error() const { return *error_; } 2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If RE2 could not be created properly, returns an error code. 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Else returns RE2::NoError (== 0). 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ErrorCode error_code() const { return error_code_; } 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If RE2 could not be created properly, returns the offending 2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // portion of the regexp. 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const string& error_arg() const { return error_arg_; } 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns the program size, a very approximate measure of a regexp's "cost". 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Larger numbers are more expensive than smaller numbers. 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int ProgramSize() const; 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns the underlying Regexp; not for general use. 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns entire_regexp_ so that callers don't need 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to know about prefix_ and prefix_foldcase_. 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) re2::Regexp* Regexp() const { return entire_regexp_; } 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /***** The useful part: the matching interface *****/ 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Matches "text" against "pattern". If pointer arguments are 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // supplied, copies matched sub-patterns into them. 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // You can pass in a "const char*" or a "string" for "text". 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // You can pass in a "const char*" or a "string" or a "RE2" for "pattern". 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The provided pointer arguments can be pointers to any scalar numeric 3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // type, or one of: 3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // string (matched piece is copied to string) 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // StringPiece (StringPiece is mutated to point to matched piece) 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // T (where "bool T::ParseFrom(const char*, int)" exists) 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (void*)NULL (the corresponding matched sub-pattern is not copied) 3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true iff all of the following conditions are satisfied: 3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // a. "text" matches "pattern" exactly 3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // b. The number of matched sub-patterns is >= number of supplied pointers 3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // c. The "i"th argument has a suitable type for holding the 3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // string captured as the "i"th sub-pattern. If you pass in 3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // NULL for the "i"th argument, or pass fewer arguments than 3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // number of sub-patterns, "i"th captured sub-pattern is 3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // ignored. 3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // CAVEAT: An optional sub-pattern that does not exist in the 3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // matched string is assigned the empty string. Therefore, the 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // following will return false (because the empty string is not a 3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // valid number): 3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // int number; 3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number); 3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool FullMatchN(const StringPiece& text, const RE2& re, 3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Arg* const args[], int argc); 3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const VariadicFunction2< 3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool, const StringPiece&, const RE2&, Arg, RE2::FullMatchN> FullMatch; 3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Exactly like FullMatch(), except that "pattern" is allowed to match 3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // a substring of "text". 3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool PartialMatchN(const StringPiece& text, const RE2& re, // 3..16 args 3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Arg* const args[], int argc); 3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const VariadicFunction2< 3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool, const StringPiece&, const RE2&, Arg, RE2::PartialMatchN> PartialMatch; 3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Like FullMatch() and PartialMatch(), except that pattern has to 3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // match a prefix of "text", and "input" is advanced past the matched 3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // text. Note: "input" is modified iff this routine returns true. 3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool ConsumeN(StringPiece* input, const RE2& pattern, // 3..16 args 3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Arg* const args[], int argc); 3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const VariadicFunction2< 3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool, StringPiece*, const RE2&, Arg, RE2::ConsumeN> Consume; 3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Like Consume(..), but does not anchor the match at the beginning of the 3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // string. That is, "pattern" need not start its match at the beginning of 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next 3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // word in "s" and stores it in "word". 3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool FindAndConsumeN(StringPiece* input, const RE2& pattern, 3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Arg* const args[], int argc); 3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const VariadicFunction2< 3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool, StringPiece*, const RE2&, Arg, RE2::FindAndConsumeN> FindAndConsume; 3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Replace the first match of "pattern" in "str" with "rewrite". 3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Within "rewrite", backslash-escaped digits (\1 to \9) can be 3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // used to insert text matching corresponding parenthesized group 3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // from the pattern. \0 in "rewrite" refers to the entire matching 3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // text. E.g., 3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // string s = "yabba dabba doo"; 3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // CHECK(RE2::Replace(&s, "b+", "d")); 3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // will leave "s" containing "yada dabba doo" 3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if the pattern matches and a replacement occurs, 3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // false otherwise. 3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool Replace(string *str, 3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const RE2& pattern, 3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece& rewrite); 3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Like Replace(), except replaces successive non-overlapping occurrences 3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // of the pattern in the string with the rewrite. E.g. 3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // string s = "yabba dabba doo"; 3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // CHECK(RE2::GlobalReplace(&s, "b+", "d")); 3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // will leave "s" containing "yada dada doo" 3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Replacements are not subject to re-matching. 3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Because GlobalReplace only replaces non-overlapping matches, 3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // replacing "ana" within "banana" makes only one replacement, not two. 3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns the number of replacements made. 3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static int GlobalReplace(string *str, 3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const RE2& pattern, 3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece& rewrite); 3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Like Replace, except that if the pattern matches, "rewrite" 3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // is copied into "out" with substitutions. The non-matching 3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // portions of "text" are ignored. 3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true iff a match occurred and the extraction happened 3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // successfully; if no match occurs, the string is left unaffected. 3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool Extract(const StringPiece &text, 3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const RE2& pattern, 3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece &rewrite, 4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string *out); 4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Escapes all potentially meaningful regexp characters in 4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 'unquoted'. The returned string, used as a regular expression, 4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // will exactly match the original string. For example, 4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 1.5-2.0? 4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // may become: 4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 1\.5\-2\.0\? 4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static string QuoteMeta(const StringPiece& unquoted); 4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Computes range for any strings matching regexp. The min and max can in 4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // some cases be arbitrarily precise, so the caller gets to specify the 4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // maximum desired length of string returned. 4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any 4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // string s that is an anchored match for this regexp satisfies 4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // min <= s && s <= max. 4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Note that PossibleMatchRange() will only consider the first copy of an 4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // infinitely repeated element (i.e., any regexp element followed by a '*' or 4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // '+' operator). Regexps with "{N}" constructions are not affected, as those 4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // do not compile down to infinite repetitions. 4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true on success, false on error. 4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool PossibleMatchRange(string* min, string* max, int maxlen) const; 4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Generic matching interface 4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Type of match. 4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enum Anchor { 4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UNANCHORED, // No anchoring 4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ANCHOR_START, // Anchor at start only 4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ANCHOR_BOTH, // Anchor at start and end 4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Return the number of capturing subpatterns, or -1 if the 4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // regexp wasn't valid on construction. The overall match ($0) 4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // does not count: if the regexp is "(a)(b)", returns 2. 4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int NumberOfCapturingGroups() const; 4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Return a map from names to capturing indices. 4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The map records the index of the leftmost group 4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // with the given name. 4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Only valid until the re is deleted. 4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const map<string, int>& NamedCapturingGroups() const; 4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Return a map from capturing indices to names. 4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The map has no entries for unnamed groups. 4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Only valid until the re is deleted. 4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const map<int, string>& CapturingGroupNames() const; 4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // General matching routine. 4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Match against text starting at offset startpos 4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // and stopping the search at offset endpos. 4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if match found, false if not. 4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // On a successful match, fills in match[] (up to nmatch entries) 4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // with information about submatches. 4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, 4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // setting match[0] = "barbaz", match[1] = NULL, match[2] = "bar", 4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // match[3] = NULL, ..., up to match[nmatch-1] = NULL. 4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Don't ask for more match information than you will use: 4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // runs much faster with nmatch == 1 than nmatch > 1, and 4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // runs even faster if nmatch == 0. 4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Doesn't make sense to use nmatch > 1 + NumberOfCapturingGroups(), 4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // but will be handled correctly. 4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Passing text == StringPiece(NULL, 0) will be handled like any other 4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // empty string, but note that on return, it will not be possible to tell 4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // whether submatch i matched the empty string or did not match: 4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // either way, match[i] == NULL. 4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Match(const StringPiece& text, 4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int startpos, 4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int endpos, 4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Anchor anchor, 4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece *match, 4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nmatch) const; 4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Check that the given rewrite string is suitable for use with this 4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // regular expression. It checks that: 4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * The regular expression has enough parenthesized subexpressions 4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to satisfy all of the \N tokens in rewrite 4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * The rewrite string doesn't have any syntax errors. E.g., 4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // '\' followed by anything other than a digit or '\'. 4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // A true return value guarantees that Replace() and Extract() won't 4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // fail because of a bad rewrite string. 4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool CheckRewriteString(const StringPiece& rewrite, string* error) const; 4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns the maximum submatch needed for the rewrite to be done by 4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2. 4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static int MaxSubmatch(const StringPiece& rewrite); 4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Append the "rewrite" string, with backslash subsitutions from "vec", 4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to string "out". 4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true on success. This method can fail because of a malformed 4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // rewrite string. CheckRewriteString guarantees that the rewrite will 4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // be sucessful. 4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Rewrite(string *out, 4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece &rewrite, 5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece* vec, 5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int veclen) const; 5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Constructor options 5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) class Options { 5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The options are (defaults in parentheses): 5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // utf8 (true) text and pattern are UTF-8; otherwise Latin-1 5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // posix_syntax (false) restrict regexps to POSIX egrep syntax 5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // longest_match (false) search for longest match, not first match 5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // log_errors (true) log syntax and execution errors to ERROR 5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // max_mem (see below) approx. max memory footprint of RE2 5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // literal (false) interpret string as literal, not regexp 5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // never_nl (false) never match \n, even if it is in regexp 5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // never_capture (false) parse all parens as non-capturing 5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // case_sensitive (true) match is case-sensitive (regexp can override 5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // with (?i) unless in posix_syntax mode) 5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The following options are only consulted when posix_syntax == true. 5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (When posix_syntax == false these features are always enabled and 5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // cannot be turned off.) 5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // perl_classes (false) allow Perl's \d \s \w \D \S \W 5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // word_boundary (false) allow Perl's \b \B (word boundary and not) 5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // one_line (false) ^ and $ only match beginning and end of text 5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The max_mem option controls how much memory can be used 5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to hold the compiled form of the regexp (the Prog) and 5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // its cached DFA graphs. Code Search placed limits on the number 5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // of Prog instructions and DFA states: 10,000 for both. 5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // In RE2, those limits would translate to about 240 KB per Prog 5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a 5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // better job of keeping them small than Code Search did). 5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Each RE2 has two Progs (one forward, one reverse), and each Prog 5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // can have two DFAs (one first match, one longest match). 5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // That makes 4 DFAs: 5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // forward, first-match - used for UNANCHORED or ANCHOR_LEFT searches 5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // if opt.longest_match() == false 5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // forward, longest-match - used for all ANCHOR_BOTH searches, 5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // and the other two kinds if 5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // opt.longest_match() == true 5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // reverse, first-match - never used 5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // reverse, longest-match - used as second phase for unanchored searches 5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The RE2 memory budget is statically divided between the two 5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Progs and then the DFAs: two thirds to the forward Prog 5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // and one third to the reverse Prog. The forward Prog gives half 5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // of what it has left over to each of its DFAs. The reverse Prog 5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // gives it all to its longest-match DFA. 5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Once a DFA fills its budget, it flushes its cache and starts over. 5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If this happens too often, RE2 falls back on the NFA implementation. 5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // For now, make the default budget something close to Code Search. 5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef WIN32 5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const int kDefaultMaxMem = 8<<20; 5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enum Encoding { 5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EncodingUTF8 = 1, 5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EncodingLatin1 5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Options(); 5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /*implicit*/ Options(CannedOptions); 5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Encoding encoding() const { return encoding_; } 5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_encoding(Encoding encoding) { encoding_ = encoding; } 5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Legacy interface to encoding. 5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO(rsc): Remove once clients have been converted. 5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool utf8() const { return encoding_ == EncodingUTF8; } 5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_utf8(bool b) { 5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (b) { 5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) encoding_ = EncodingUTF8; 5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) encoding_ = EncodingLatin1; 5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool posix_syntax() const { return posix_syntax_; } 5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_posix_syntax(bool b) { posix_syntax_ = b; } 5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool longest_match() const { return longest_match_; } 5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_longest_match(bool b) { longest_match_ = b; } 5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool log_errors() const { return log_errors_; } 5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_log_errors(bool b) { log_errors_ = b; } 5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int max_mem() const { return max_mem_; } 5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_max_mem(int m) { max_mem_ = m; } 5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool literal() const { return literal_; } 5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_literal(bool b) { literal_ = b; } 5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool never_nl() const { return never_nl_; } 5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_never_nl(bool b) { never_nl_ = b; } 5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool never_capture() const { return never_capture_; } 6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_never_capture(bool b) { never_capture_ = b; } 6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool case_sensitive() const { return case_sensitive_; } 6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_case_sensitive(bool b) { case_sensitive_ = b; } 6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool perl_classes() const { return perl_classes_; } 6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_perl_classes(bool b) { perl_classes_ = b; } 6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool word_boundary() const { return word_boundary_; } 6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_word_boundary(bool b) { word_boundary_ = b; } 6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool one_line() const { return one_line_; } 6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_one_line(bool b) { one_line_ = b; } 6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void Copy(const Options& src) { 6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) encoding_ = src.encoding_; 6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) posix_syntax_ = src.posix_syntax_; 6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) longest_match_ = src.longest_match_; 6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) log_errors_ = src.log_errors_; 6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) max_mem_ = src.max_mem_; 6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) literal_ = src.literal_; 6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) never_nl_ = src.never_nl_; 6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) never_capture_ = src.never_capture_; 6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case_sensitive_ = src.case_sensitive_; 6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) perl_classes_ = src.perl_classes_; 6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) word_boundary_ = src.word_boundary_; 6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) one_line_ = src.one_line_; 6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int ParseFlags() const; 6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Encoding encoding_; 6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool posix_syntax_; 6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool longest_match_; 6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool log_errors_; 6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int64_t max_mem_; 6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool literal_; 6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool never_nl_; 6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool never_capture_; 6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool case_sensitive_; 6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool perl_classes_; 6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool word_boundary_; 6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool one_line_; 6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) //DISALLOW_EVIL_CONSTRUCTORS(Options); 6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Options(const Options&); 6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void operator=(const Options&); 6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns the options set in the constructor. 6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Options& options() const { return options_; }; 6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Argument converters; see below. 6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg CRadix(short* x); 6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg CRadix(unsigned short* x); 6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg CRadix(int* x); 6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg CRadix(unsigned int* x); 6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg CRadix(long* x); 6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg CRadix(unsigned long* x); 6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg CRadix(long long* x); 6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg CRadix(unsigned long long* x); 6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Hex(short* x); 6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Hex(unsigned short* x); 6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Hex(int* x); 6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Hex(unsigned int* x); 6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Hex(long* x); 6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Hex(unsigned long* x); 6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Hex(long long* x); 6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Hex(unsigned long long* x); 6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Octal(short* x); 6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Octal(unsigned short* x); 6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Octal(int* x); 6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Octal(unsigned int* x); 6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Octal(long* x); 6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Octal(unsigned long* x); 6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Octal(long long* x); 6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline Arg Octal(unsigned long long* x); 6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void Init(const StringPiece& pattern, const Options& options); 6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool DoMatch(const StringPiece& text, 6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Anchor anchor, 6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int* consumed, 6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Arg* const args[], 6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n) const; 6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) re2::Prog* ReverseProg() const; 6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mutable Mutex* mutex_; 6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string pattern_; // string regular expression 6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Options options_; // option flags 6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string prefix_; // required prefix (before regexp_) 6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool prefix_foldcase_; // prefix is ASCII case-insensitive 6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) re2::Regexp* entire_regexp_; // parsed regular expression 6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) re2::Regexp* suffix_regexp_; // parsed regular expression, prefix removed 6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) re2::Prog* prog_; // compiled program for regexp 7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mutable re2::Prog* rprog_; // reverse program for regexp 7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool is_one_pass_; // can use prog_->SearchOnePass? 7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mutable const string* error_; // Error indicator 7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (or points to empty string) 7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mutable ErrorCode error_code_; // Error code 7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mutable string error_arg_; // Fragment of regexp showing error 7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mutable int num_captures_; // Number of capturing groups 7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Map from capture names to indices 7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mutable const map<string, int>* named_groups_; 7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Map from capture indices to names 7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mutable const map<int, string>* group_names_; 7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) //DISALLOW_EVIL_CONSTRUCTORS(RE2); 7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RE2(const RE2&); 7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void operator=(const RE2&); 7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/***** Implementation details *****/ 7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Hex/Octal/Binary? 7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Special class for parsing into objects that define a ParseFrom() method 7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)template <class T> 7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class _RE2_MatchObject { 7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline bool Parse(const char* str, int n, void* dest) { 7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dest == NULL) return true; 7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) T* object = reinterpret_cast<T*>(dest); 7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return object->ParseFrom(str, n); 7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class RE2::Arg { 7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Empty constructor so we can declare arrays of RE2::Arg 7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Arg(); 7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Constructor specially designed for NULL arguments 7405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Arg(void*); 7415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) typedef bool (*Parser)(const char* str, int n, void* dest); 7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Type-specific parsers 7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define MAKE_PARSER(type,name) \ 7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Arg(type* p) : arg_(p), parser_(name) { } \ 7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Arg(type* p, Parser parser) : arg_(p), parser_(parser) { } \ 7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(char, parse_char); 7515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(signed char, parse_char); 7525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(unsigned char, parse_uchar); 7535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(short, parse_short); 7545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(unsigned short, parse_ushort); 7555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(int, parse_int); 7565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(unsigned int, parse_uint); 7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(long, parse_long); 7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(unsigned long, parse_ulong); 7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(long long, parse_longlong); 7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(unsigned long long, parse_ulonglong); 7615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(float, parse_float); 7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(double, parse_double); 7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(string, parse_string); 7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MAKE_PARSER(StringPiece, parse_stringpiece); 7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef MAKE_PARSER 7675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Generic constructor 7695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) template <class T> Arg(T*, Parser parser); 7705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Generic constructor template 7715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) template <class T> Arg(T* p) 7725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : arg_(p), parser_(_RE2_MatchObject<T>::Parse) { 7735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Parse the data 7765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Parse(const char* str, int n) const; 7775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 7795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void* arg_; 7805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Parser parser_; 7815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_null (const char* str, int n, void* dest); 7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_char (const char* str, int n, void* dest); 7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_uchar (const char* str, int n, void* dest); 7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_float (const char* str, int n, void* dest); 7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_double (const char* str, int n, void* dest); 7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_string (const char* str, int n, void* dest); 7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_stringpiece (const char* str, int n, void* dest); 7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DECLARE_INTEGER_PARSER(name) \ 7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: \ 7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_ ## name(const char* str, int n, void* dest); \ 7935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_ ## name ## _radix( \ 7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* str, int n, void* dest, int radix); \ 7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: \ 7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \ 7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \ 7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool parse_ ## name ## _cradix(const char* str, int n, void* dest) 7995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECLARE_INTEGER_PARSER(short); 8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECLARE_INTEGER_PARSER(ushort); 8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECLARE_INTEGER_PARSER(int); 8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECLARE_INTEGER_PARSER(uint); 8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECLARE_INTEGER_PARSER(long); 8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECLARE_INTEGER_PARSER(ulong); 8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECLARE_INTEGER_PARSER(longlong); 8075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECLARE_INTEGER_PARSER(ulonglong); 8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef DECLARE_INTEGER_PARSER 8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline RE2::Arg::Arg() : arg_(NULL), parser_(parse_null) { } 8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline RE2::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } 8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline bool RE2::Arg::Parse(const char* str, int n) const { 8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (*parser_)(str, n, arg_); 8175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This part of the parser, appropriate only for ints, deals with bases 8205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define MAKE_INTEGER_PARSER(type, name) \ 8215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline RE2::Arg RE2::Hex(type* ptr) { \ 8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _hex); } \ 8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline RE2::Arg RE2::Octal(type* ptr) { \ 8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _octal); } \ 8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline RE2::Arg RE2::CRadix(type* ptr) { \ 8265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _cradix); } 8275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MAKE_INTEGER_PARSER(short, short); 8295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MAKE_INTEGER_PARSER(unsigned short, ushort); 8305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MAKE_INTEGER_PARSER(int, int); 8315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MAKE_INTEGER_PARSER(unsigned int, uint); 8325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MAKE_INTEGER_PARSER(long, long); 8335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MAKE_INTEGER_PARSER(unsigned long, ulong); 8345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MAKE_INTEGER_PARSER(long long, longlong); 8355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MAKE_INTEGER_PARSER(unsigned long long, ulonglong); 8365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef MAKE_INTEGER_PARSER 8385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace re2 8405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using re2::RE2; 8425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* RE2_RE2_H */ 844