10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2008 The RE2 Authors. All Rights Reserved. 20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style 30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file. 40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Regular expression generator: generates all possible 60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// regular expressions within parameters (see regexp_generator.h for details). 70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// The regexp generator first generates a sequence of commands in a simple 90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// postfix language. Each command in the language is a string, 100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// like "a" or "%s*" or "%s|%s". 110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// To evaluate a command, enough arguments are popped from the value stack to 130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// plug into the %s slots. Then the result is pushed onto the stack. 140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// For example, the command sequence 150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// a b %s%s c 160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// results in the stack 170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// ab c 180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// GeneratePostfix generates all possible command sequences. 200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Then RunPostfix turns each sequence into a regular expression 210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// and passes the regexp to HandleRegexp. 220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <string.h> 240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <string> 250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <stack> 260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <vector> 270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/test.h" 280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/regexp_generator.h" 290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 { 310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Returns a vector of the egrep regexp operators. 330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinconst vector<string>& RegexpGenerator::EgrepOps() { 340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin static const char *ops[] = { 350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "%s%s", 360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "%s|%s", 370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "%s*", 380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "%s+", 390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "%s?", 400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "%s\\C*", 410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin }; 420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin static vector<string> v(ops, ops + arraysize(ops)); 430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return v; 440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinRegexpGenerator::RegexpGenerator(int maxatoms, int maxops, 470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const vector<string>& atoms, 480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const vector<string>& ops) 490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin : maxatoms_(maxatoms), maxops_(maxops), atoms_(atoms), ops_(ops) { 500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Degenerate case. 510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (atoms_.size() == 0) 520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin maxatoms_ = 0; 530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (ops_.size() == 0) 540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin maxops_ = 0; 550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Generates all possible regular expressions (within the parameters), 580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// calling HandleRegexp for each one. 590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid RegexpGenerator::Generate() { 600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin vector<string> postfix; 610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin GeneratePostfix(&postfix, 0, 0, 0); 620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Generates random regular expressions, calling HandleRegexp for each one. 650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid RegexpGenerator::GenerateRandom(int32 seed, int n) { 660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin ACMRandom acm(seed); 670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin acm_ = &acm; 680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < n; i++) { 700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin vector<string> postfix; 710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin GenerateRandomPostfix(&postfix, 0, 0, 0); 720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin acm_ = NULL; 750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Counts and returns the number of occurrences of "%s" in s. 780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic int CountArgs(const string& s) { 790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const char *p = s.c_str(); 800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int n = 0; 810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin while ((p = strstr(p, "%s")) != NULL) { 820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin p += 2; 830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin n++; 840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return n; 860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Generates all possible postfix command sequences. 890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Each sequence is handed off to RunPostfix to generate a regular expression. 900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// The arguments are: 910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// post: the current postfix sequence 920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// nstk: the number of elements that would be on the stack after executing 930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// the sequence 940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// ops: the number of operators used in the sequence 950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// atoms: the number of atoms used in the sequence 960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// For example, if post were ["a", "b", "%s%s", "c"], 970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// then nstk = 2, ops = 1, atoms = 3. 980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// The initial call should be GeneratePostfix([empty vector], 0, 0, 0). 1000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 1010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid RegexpGenerator::GeneratePostfix(vector<string>* post, int nstk, 1020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int ops, int atoms) { 1030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (nstk == 1) 1040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin RunPostfix(*post); 1050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Early out: if used too many operators or can't 1070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // get back down to a single expression on the stack 1080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // using binary operators, give up. 1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (ops + nstk - 1 > maxops_) 1100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return; 1110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Add atoms if there is room. 1130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (atoms < maxatoms_) { 1140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < atoms_.size(); i++) { 1150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin post->push_back(atoms_[i]); 1160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin GeneratePostfix(post, nstk + 1, ops, atoms + 1); 1170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin post->pop_back(); 1180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Add operators if there are enough arguments. 1220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (ops < maxops_) { 1230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < ops_.size(); i++) { 1240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const string& fmt = ops_[i]; 1250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int nargs = CountArgs(fmt); 1260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (nargs <= nstk) { 1270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin post->push_back(fmt); 1280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin GeneratePostfix(post, nstk - nargs + 1, ops + 1, atoms); 1290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin post->pop_back(); 1300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 1340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Generates a random postfix command sequence. 1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Stops and returns true once a single sequence has been generated. 1370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinbool RegexpGenerator::GenerateRandomPostfix(vector<string> *post, int nstk, 1380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int ops, int atoms) { 1390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (;;) { 1400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Stop if we get to a single element, but only sometimes. 1410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (nstk == 1 && acm_->Uniform(maxatoms_ + 1 - atoms) == 0) { 1420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin RunPostfix(*post); 1430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return true; 1440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Early out: if used too many operators or can't 1470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // get back down to a single expression on the stack 1480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // using binary operators, give up. 1490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (ops + nstk - 1 > maxops_) 1500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; 1510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Add operators if there are enough arguments. 1530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (ops < maxops_ && acm_->Uniform(2) == 0) { 1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const string& fmt = ops_[acm_->Uniform(ops_.size())]; 1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int nargs = CountArgs(fmt); 1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (nargs <= nstk) { 1570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin post->push_back(fmt); 1580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool ret = GenerateRandomPostfix(post, nstk - nargs + 1, 1590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin ops + 1, atoms); 1600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin post->pop_back(); 1610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (ret) 1620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return true; 1630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Add atoms if there is room. 1670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (atoms < maxatoms_ && acm_->Uniform(2) == 0) { 1680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin post->push_back(atoms_[acm_->Uniform(atoms_.size())]); 1690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool ret = GenerateRandomPostfix(post, nstk + 1, ops, atoms + 1); 1700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin post->pop_back(); 1710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (ret) 1720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return true; 1730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 1760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Interprets the postfix command sequence to create a regular expression 1780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// passed to HandleRegexp. The results of operators like %s|%s are wrapped 1790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// in (?: ) to avoid needing to maintain a precedence table. 1800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid RegexpGenerator::RunPostfix(const vector<string>& post) { 1810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin stack<string> regexps; 1820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < post.size(); i++) { 1830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin switch (CountArgs(post[i])) { 1840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin default: 1850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(FATAL) << "Bad operator: " << post[i]; 1860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case 0: 1870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin regexps.push(post[i]); 1880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 1890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case 1: { 1900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string a = regexps.top(); 1910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin regexps.pop(); 1920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin regexps.push("(?:" + StringPrintf(post[i].c_str(), a.c_str()) + ")"); 1930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 1940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case 2: { 1960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string b = regexps.top(); 1970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin regexps.pop(); 1980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string a = regexps.top(); 1990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin regexps.pop(); 2000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin regexps.push("(?:" + 2010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringPrintf(post[i].c_str(), a.c_str(), b.c_str()) + 2020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin ")"); 2030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 2040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (regexps.size() != 1) { 2090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Internal error - should never happen. 2100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin printf("Bad regexp program:\n"); 2110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < post.size(); i++) { 2120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin printf(" %s\n", CEscape(post[i]).c_str()); 2130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin printf("Stack after running program:\n"); 2150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin while (!regexps.empty()) { 2160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin printf(" %s\n", CEscape(regexps.top()).c_str()); 2170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin regexps.pop(); 2180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(FATAL) << "Bad regexp program."; 2200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin HandleRegexp(regexps.top()); 2230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin HandleRegexp("^(?:" + regexps.top() + ")$"); 2240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin HandleRegexp("^(?:" + regexps.top() + ")"); 2250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin HandleRegexp("(?:" + regexps.top() + ")$"); 2260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 2270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Split s into an vector of strings, one for each UTF-8 character. 2290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvector<string> Explode(const StringPiece& s) { 2300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin vector<string> v; 2310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (const char *q = s.begin(); q < s.end(); ) { 2330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const char* p = q; 2340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Rune r; 2350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin q += chartorune(&r, q); 2360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin v.push_back(string(p, q - p)); 2370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return v; 2400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 2410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Split string everywhere a substring is found, returning 2430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// vector of pieces. 2440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvector<string> Split(const StringPiece& sep, const StringPiece& s) { 2450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin vector<string> v; 2460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (sep.size() == 0) 2480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return Explode(s); 2490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const char *p = s.begin(); 2510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) { 2520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (StringPiece(q, sep.size()) == sep) { 2530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin v.push_back(string(p, q - p)); 2540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin p = q + sep.size(); 2550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin q = p - 1; // -1 for ++ in loop 2560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin continue; 2570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (p < s.end()) 2600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin v.push_back(string(p, s.end() - p)); 2610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return v; 2620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 2630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} // namespace re2 265