15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2008 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Regular expression generator: generates all possible
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regular expressions within given parameters (see below for details).
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef RE2_TESTING_REGEXP_GENERATOR_H__
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define RE2_TESTING_REGEXP_GENERATOR_H__
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector>
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/random.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/stringpiece.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Regular expression generator.
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Given a set of atom expressions like "a", "b", or "."
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and operators like "%s*", generates all possible regular expressions
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// using at most maxbases base expressions and maxops operators.
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// For each such expression re, calls HandleRegexp(re).
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Callers are expected to subclass RegexpGenerator and provide HandleRegexp.
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class RegexpGenerator {
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RegexpGenerator(int maxatoms, int maxops, const vector<string>& atoms,
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  const vector<string>& ops);
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual ~RegexpGenerator() {}
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generates all the regular expressions, calling HandleRegexp(re) for each.
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Generate();
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generates n random regular expressions, calling HandleRegexp(re) for each.
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void GenerateRandom(int32 seed, int n);
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Handles a regular expression.  Must be provided by subclass.
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual void HandleRegexp(const string& regexp) = 0;
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The egrep regexp operators: * + ? | and concatenation.
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const vector<string>& EgrepOps();
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void RunPostfix(const vector<string>& post);
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void GeneratePostfix(vector<string>* post, int nstk, int ops, int lits);
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool GenerateRandomPostfix(vector<string>* post, int nstk, int ops, int lits);
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int maxatoms_;           // Maximum number of atoms allowed in expr.
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int maxops_;             // Maximum number of ops allowed in expr.
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  vector<string> atoms_;   // Possible atoms.
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  vector<string> ops_;     // Possible ops.
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ACMRandom* acm_;         // Random generator.
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_EVIL_CONSTRUCTORS(RegexpGenerator);
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Helpers for preparing arguments to RegexpGenerator constructor.
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns one string for each character in s.
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)vector<string> Explode(const StringPiece& s);
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Splits string everywhere sep is found, returning
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// vector of pieces.
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)vector<string> Split(const StringPiece& sep, const StringPiece& s);
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // RE2_TESTING_REGEXP_GENERATOR_H__
71