10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2008 The RE2 Authors. All Rights Reserved. 20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style 30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file. 40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Random testing of regular expression matching. 60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <stdio.h> 80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/test.h" 90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/exhaustive_tester.h" 100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(regexpseed, 404, "Random regexp seed."); 120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(regexpcount, 100, "How many random regexps to generate."); 130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(stringseed, 200, "Random string seed."); 140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(stringcount, 100, "How many random strings to generate."); 150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 { 170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Runs a random test on the given parameters. 190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// (Always uses the same random seeds for reproducibility. 200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Can give different seeds on command line.) 210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic void RandomTest(int maxatoms, int maxops, 220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const vector<string>& alphabet, 230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const vector<string>& ops, 240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int maxstrlen, const vector<string>& stralphabet, 250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const string& wrapper) { 260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Limit to smaller test cases in debug mode, 270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // because everything is so much slower. 280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (DEBUG_MODE) { 290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin maxatoms--; 300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin maxops--; 310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin maxstrlen /= 2; 320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin ExhaustiveTester t(maxatoms, maxops, alphabet, ops, 350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin maxstrlen, stralphabet, wrapper, ""); 360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t.RandomStrings(FLAGS_stringseed, FLAGS_stringcount); 370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t.GenerateRandom(FLAGS_regexpseed, FLAGS_regexpcount); 380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin printf("%d regexps, %d tests, %d failures [%d/%d str]\n", 390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size()); 400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_EQ(0, t.failures()); 410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tests random small regexps involving literals and egrep operators. 440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Random, SmallEgrepLiterals) { 450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin RandomTest(5, 5, Explode("abc."), RegexpGenerator::EgrepOps(), 460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 15, Explode("abc"), 470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin ""); 480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tests random bigger regexps involving literals and egrep operators. 510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Random, BigEgrepLiterals) { 520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin RandomTest(10, 10, Explode("abc."), RegexpGenerator::EgrepOps(), 530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 15, Explode("abc"), 540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin ""); 550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tests random small regexps involving literals, capturing parens, 580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// and egrep operators. 590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Random, SmallEgrepCaptures) { 600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin RandomTest(5, 5, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(), 610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 15, Explode("abc"), 620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin ""); 630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tests random bigger regexps involving literals, capturing parens, 660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// and egrep operators. 670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Random, BigEgrepCaptures) { 680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin RandomTest(10, 10, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(), 690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 15, Explode("abc"), 700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin ""); 710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tests random large complicated expressions, using all the possible 740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// operators, some literals, some parenthesized literals, and predefined 750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// character classes like \d. (Adding larger character classes would 760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// make for too many possibilities.) 770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Random, Complicated) { 780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin vector<string> ops = Split(" ", 790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "%s%s %s|%s %s* %s*? %s+ %s+? %s? %s?? " 800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} %s{1,2} " 810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "%s{2} %s{2,} %s{3,4} %s{4,5}"); 820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Use (?:\b) and (?:\B) instead of \b and \B, 840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // because PCRE rejects \b* but accepts (?:\b)*. 850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Ditto ^ and $. 860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin vector<string> atoms = Split(" ", 870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin ". (?:^) (?:$) \\a \\f \\n \\r \\t \\v " 880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "\\d \\D \\s \\S \\w \\W (?:\\b) (?:\\B) " 890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "a (a) b c - \\\\"); 900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin vector<string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a"); 910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin RandomTest(10, 10, atoms, ops, 20, alphabet, ""); 920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} // namespace re2 950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 96