10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2008 The RE2 Authors.  All Rights Reserved.
20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style
30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file.
40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Regular expression generator: generates all possible
60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// regular expressions within parameters (see regexp_generator.h for details).
70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// The regexp generator first generates a sequence of commands in a simple
90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// postfix language.  Each command in the language is a string,
100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// like "a" or "%s*" or "%s|%s".
110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//
120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// To evaluate a command, enough arguments are popped from the value stack to
130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// plug into the %s slots.  Then the result is pushed onto the stack.
140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// For example, the command sequence
150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//      a b %s%s c
160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// results in the stack
170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//      ab c
180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//
190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// GeneratePostfix generates all possible command sequences.
200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Then RunPostfix turns each sequence into a regular expression
210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// and passes the regexp to HandleRegexp.
220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <string.h>
240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <string>
250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <stack>
260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <vector>
270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/test.h"
280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/regexp_generator.h"
290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 {
310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Returns a vector of the egrep regexp operators.
330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinconst vector<string>& RegexpGenerator::EgrepOps() {
340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  static const char *ops[] = {
350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    "%s%s",
360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    "%s|%s",
370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    "%s*",
380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    "%s+",
390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    "%s?",
400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    "%s\\C*",
410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  };
420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  static vector<string> v(ops, ops + arraysize(ops));
430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  return v;
440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinRegexpGenerator::RegexpGenerator(int maxatoms, int maxops,
470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                 const vector<string>& atoms,
480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                 const vector<string>& ops)
490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    : maxatoms_(maxatoms), maxops_(maxops), atoms_(atoms), ops_(ops) {
500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Degenerate case.
510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (atoms_.size() == 0)
520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    maxatoms_ = 0;
530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (ops_.size() == 0)
540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    maxops_ = 0;
550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Generates all possible regular expressions (within the parameters),
580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// calling HandleRegexp for each one.
590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid RegexpGenerator::Generate() {
600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  vector<string> postfix;
610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  GeneratePostfix(&postfix, 0, 0, 0);
620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Generates random regular expressions, calling HandleRegexp for each one.
650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid RegexpGenerator::GenerateRandom(int32 seed, int n) {
660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  ACMRandom acm(seed);
670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  acm_ = &acm;
680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < n; i++) {
700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    vector<string> postfix;
710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    GenerateRandomPostfix(&postfix, 0, 0, 0);
720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  acm_ = NULL;
750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Counts and returns the number of occurrences of "%s" in s.
780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic int CountArgs(const string& s) {
790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  const char *p = s.c_str();
800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int n = 0;
810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  while ((p = strstr(p, "%s")) != NULL) {
820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    p += 2;
830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    n++;
840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  return n;
860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Generates all possible postfix command sequences.
890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Each sequence is handed off to RunPostfix to generate a regular expression.
900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// The arguments are:
910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//   post:  the current postfix sequence
920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//   nstk:  the number of elements that would be on the stack after executing
930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//          the sequence
940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//   ops:   the number of operators used in the sequence
950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//   atoms: the number of atoms used in the sequence
960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// For example, if post were ["a", "b", "%s%s", "c"],
970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// then nstk = 2, ops = 1, atoms = 3.
980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//
990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// The initial call should be GeneratePostfix([empty vector], 0, 0, 0).
1000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//
1010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid RegexpGenerator::GeneratePostfix(vector<string>* post, int nstk,
1020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                      int ops, int atoms) {
1030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (nstk == 1)
1040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    RunPostfix(*post);
1050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Early out: if used too many operators or can't
1070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // get back down to a single expression on the stack
1080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // using binary operators, give up.
1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (ops + nstk - 1 > maxops_)
1100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return;
1110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Add atoms if there is room.
1130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (atoms < maxatoms_) {
1140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int i = 0; i < atoms_.size(); i++) {
1150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      post->push_back(atoms_[i]);
1160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      GeneratePostfix(post, nstk + 1, ops, atoms + 1);
1170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      post->pop_back();
1180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Add operators if there are enough arguments.
1220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (ops < maxops_) {
1230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int i = 0; i < ops_.size(); i++) {
1240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      const string& fmt = ops_[i];
1250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      int nargs = CountArgs(fmt);
1260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (nargs <= nstk) {
1270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        post->push_back(fmt);
1280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        GeneratePostfix(post, nstk - nargs + 1, ops + 1, atoms);
1290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        post->pop_back();
1300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
1310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Generates a random postfix command sequence.
1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Stops and returns true once a single sequence has been generated.
1370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinbool RegexpGenerator::GenerateRandomPostfix(vector<string> *post, int nstk,
1380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                            int ops, int atoms) {
1390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (;;) {
1400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // Stop if we get to a single element, but only sometimes.
1410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (nstk == 1 && acm_->Uniform(maxatoms_ + 1 - atoms) == 0) {
1420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      RunPostfix(*post);
1430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return true;
1440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // Early out: if used too many operators or can't
1470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // get back down to a single expression on the stack
1480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // using binary operators, give up.
1490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (ops + nstk - 1 > maxops_)
1500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return false;
1510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // Add operators if there are enough arguments.
1530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (ops < maxops_ && acm_->Uniform(2) == 0) {
1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      const string& fmt = ops_[acm_->Uniform(ops_.size())];
1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      int nargs = CountArgs(fmt);
1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (nargs <= nstk) {
1570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        post->push_back(fmt);
1580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        bool ret = GenerateRandomPostfix(post, nstk - nargs + 1,
1590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                         ops + 1, atoms);
1600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        post->pop_back();
1610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        if (ret)
1620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin          return true;
1630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
1640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // Add atoms if there is room.
1670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (atoms < maxatoms_ && acm_->Uniform(2) == 0) {
1680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      post->push_back(atoms_[acm_->Uniform(atoms_.size())]);
1690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      bool ret = GenerateRandomPostfix(post, nstk + 1, ops, atoms + 1);
1700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      post->pop_back();
1710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (ret)
1720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        return true;
1730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Interprets the postfix command sequence to create a regular expression
1780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// passed to HandleRegexp.  The results of operators like %s|%s are wrapped
1790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// in (?: ) to avoid needing to maintain a precedence table.
1800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid RegexpGenerator::RunPostfix(const vector<string>& post) {
1810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  stack<string> regexps;
1820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < post.size(); i++) {
1830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    switch (CountArgs(post[i])) {
1840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      default:
1850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        LOG(FATAL) << "Bad operator: " << post[i];
1860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      case 0:
1870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        regexps.push(post[i]);
1880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        break;
1890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      case 1: {
1900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        string a = regexps.top();
1910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        regexps.pop();
1920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        regexps.push("(?:" + StringPrintf(post[i].c_str(), a.c_str()) + ")");
1930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        break;
1940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
1950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      case 2: {
1960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        string b = regexps.top();
1970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        regexps.pop();
1980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        string a = regexps.top();
1990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        regexps.pop();
2000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        regexps.push("(?:" +
2010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                     StringPrintf(post[i].c_str(), a.c_str(), b.c_str()) +
2020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                     ")");
2030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        break;
2040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
2050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
2060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (regexps.size() != 1) {
2090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // Internal error - should never happen.
2100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    printf("Bad regexp program:\n");
2110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int i = 0; i < post.size(); i++) {
2120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      printf("  %s\n", CEscape(post[i]).c_str());
2130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
2140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    printf("Stack after running program:\n");
2150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    while (!regexps.empty()) {
2160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      printf("  %s\n", CEscape(regexps.top()).c_str());
2170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      regexps.pop();
2180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
2190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    LOG(FATAL) << "Bad regexp program.";
2200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  HandleRegexp(regexps.top());
2230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  HandleRegexp("^(?:" + regexps.top() + ")$");
2240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  HandleRegexp("^(?:" + regexps.top() + ")");
2250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  HandleRegexp("(?:" + regexps.top() + ")$");
2260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
2270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Split s into an vector of strings, one for each UTF-8 character.
2290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvector<string> Explode(const StringPiece& s) {
2300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  vector<string> v;
2310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (const char *q = s.begin(); q < s.end(); ) {
2330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    const char* p = q;
2340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    Rune r;
2350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    q += chartorune(&r, q);
2360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    v.push_back(string(p, q - p));
2370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  return v;
2400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
2410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Split string everywhere a substring is found, returning
2430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// vector of pieces.
2440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvector<string> Split(const StringPiece& sep, const StringPiece& s) {
2450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  vector<string> v;
2460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (sep.size() == 0)
2480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return Explode(s);
2490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  const char *p = s.begin();
2510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) {
2520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (StringPiece(q, sep.size()) == sep) {
2530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      v.push_back(string(p, q - p));
2540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      p = q + sep.size();
2550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      q = p - 1;  // -1 for ++ in loop
2560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      continue;
2570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
2580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (p < s.end())
2600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    v.push_back(string(p, s.end() - p));
2610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  return v;
2620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
2630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}  // namespace re2
265