10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2008 The RE2 Authors.  All Rights Reserved.
20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style
30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file.
40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Random testing of regular expression matching.
60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <stdio.h>
80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/test.h"
90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/exhaustive_tester.h"
100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(regexpseed, 404, "Random regexp seed.");
120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(regexpcount, 100, "How many random regexps to generate.");
130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(stringseed, 200, "Random string seed.");
140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(stringcount, 100, "How many random strings to generate.");
150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 {
170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Runs a random test on the given parameters.
190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// (Always uses the same random seeds for reproducibility.
200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Can give different seeds on command line.)
210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic void RandomTest(int maxatoms, int maxops,
220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                       const vector<string>& alphabet,
230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                       const vector<string>& ops,
240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                       int maxstrlen, const vector<string>& stralphabet,
250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                       const string& wrapper) {
260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Limit to smaller test cases in debug mode,
270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // because everything is so much slower.
280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (DEBUG_MODE) {
290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    maxatoms--;
300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    maxops--;
310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    maxstrlen /= 2;
320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                     maxstrlen, stralphabet, wrapper, "");
360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  t.RandomStrings(FLAGS_stringseed, FLAGS_stringcount);
370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  t.GenerateRandom(FLAGS_regexpseed, FLAGS_regexpcount);
380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin         t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_EQ(0, t.failures());
410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tests random small regexps involving literals and egrep operators.
440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Random, SmallEgrepLiterals) {
450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  RandomTest(5, 5, Explode("abc."), RegexpGenerator::EgrepOps(),
460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin             15, Explode("abc"),
470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin             "");
480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tests random bigger regexps involving literals and egrep operators.
510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Random, BigEgrepLiterals) {
520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  RandomTest(10, 10, Explode("abc."), RegexpGenerator::EgrepOps(),
530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin             15, Explode("abc"),
540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin             "");
550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tests random small regexps involving literals, capturing parens,
580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// and egrep operators.
590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Random, SmallEgrepCaptures) {
600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  RandomTest(5, 5, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin             15, Explode("abc"),
620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin             "");
630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tests random bigger regexps involving literals, capturing parens,
660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// and egrep operators.
670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Random, BigEgrepCaptures) {
680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  RandomTest(10, 10, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin             15, Explode("abc"),
700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin             "");
710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tests random large complicated expressions, using all the possible
740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// operators, some literals, some parenthesized literals, and predefined
750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// character classes like \d.  (Adding larger character classes would
760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// make for too many possibilities.)
770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Random, Complicated) {
780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  vector<string> ops = Split(" ",
790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    "%s%s %s|%s %s* %s*? %s+ %s+? %s? %s?? "
800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} %s{1,2} "
810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    "%s{2} %s{2,} %s{3,4} %s{4,5}");
820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Use (?:\b) and (?:\B) instead of \b and \B,
840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // because PCRE rejects \b* but accepts (?:\b)*.
850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Ditto ^ and $.
860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  vector<string> atoms = Split(" ",
870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    ". (?:^) (?:$) \\a \\f \\n \\r \\t \\v "
880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    "\\d \\D \\s \\S \\w \\W (?:\\b) (?:\\B) "
890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    "a (a) b c - \\\\");
900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  vector<string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a");
910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  RandomTest(10, 10, atoms, ops, 20, alphabet, "");
920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}  // namespace re2
950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
96