regexp_generator.h revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright 2008 The RE2 Authors.  All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Regular expression generator: generates all possible
6// regular expressions within given parameters (see below for details).
7
8#ifndef RE2_TESTING_REGEXP_GENERATOR_H__
9#define RE2_TESTING_REGEXP_GENERATOR_H__
10
11#include <string>
12#include <vector>
13#include "util/random.h"
14#include "util/util.h"
15#include "re2/stringpiece.h"
16
17namespace re2 {
18
19// Regular expression generator.
20//
21// Given a set of atom expressions like "a", "b", or "."
22// and operators like "%s*", generates all possible regular expressions
23// using at most maxbases base expressions and maxops operators.
24// For each such expression re, calls HandleRegexp(re).
25//
26// Callers are expected to subclass RegexpGenerator and provide HandleRegexp.
27//
28class RegexpGenerator {
29 public:
30  RegexpGenerator(int maxatoms, int maxops, const vector<string>& atoms,
31                  const vector<string>& ops);
32  virtual ~RegexpGenerator() {}
33
34  // Generates all the regular expressions, calling HandleRegexp(re) for each.
35  void Generate();
36
37  // Generates n random regular expressions, calling HandleRegexp(re) for each.
38  void GenerateRandom(int32 seed, int n);
39
40  // Handles a regular expression.  Must be provided by subclass.
41  virtual void HandleRegexp(const string& regexp) = 0;
42
43  // The egrep regexp operators: * + ? | and concatenation.
44  static const vector<string>& EgrepOps();
45
46 private:
47  void RunPostfix(const vector<string>& post);
48  void GeneratePostfix(vector<string>* post, int nstk, int ops, int lits);
49  bool GenerateRandomPostfix(vector<string>* post, int nstk, int ops, int lits);
50
51  int maxatoms_;           // Maximum number of atoms allowed in expr.
52  int maxops_;             // Maximum number of ops allowed in expr.
53  vector<string> atoms_;   // Possible atoms.
54  vector<string> ops_;     // Possible ops.
55  ACMRandom* acm_;         // Random generator.
56  DISALLOW_EVIL_CONSTRUCTORS(RegexpGenerator);
57};
58
59// Helpers for preparing arguments to RegexpGenerator constructor.
60
61// Returns one string for each character in s.
62vector<string> Explode(const StringPiece& s);
63
64// Splits string everywhere sep is found, returning
65// vector of pieces.
66vector<string> Split(const StringPiece& sep, const StringPiece& s);
67
68}  // namespace re2
69
70#endif  // RE2_TESTING_REGEXP_GENERATOR_H__
71