10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2008 The RE2 Authors.  All Rights Reserved.
20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style
30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file.
40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Exhaustive testing of regular expression matching.
60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Each test picks an alphabet (e.g., "abc"), a maximum string length,
80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// a maximum regular expression length, and a maximum number of letters
90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// that can appear in the regular expression.  Given these parameters,
100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// it tries every possible regular expression and string, verifying that
110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// the NFA, DFA, and a trivial backtracking implementation agree about
120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// the location of the match.
130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <stdlib.h>
150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <stdio.h>
160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#ifndef LOGGING
180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#define LOGGING 0
190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#endif
200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/test.h"
220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/exhaustive_tester.h"
230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/tester.h"
240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_bool(show_regexps, false, "show regexps during testing");
260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(max_bad_regexp_inputs, 1,
280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin             "Stop testing a regular expression after finding this many "
290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin             "strings that break it.");
300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Compiled in debug mode, the usual tests run for over an hour.
320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Have to cut it down to make the unit test machines happy.
330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode.");
340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 {
360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic char* escape(const StringPiece& sp) {
380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  static char buf[512];
390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  char* p = buf;
400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  *p++ = '\"';
410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < sp.size(); i++) {
420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if(p+5 >= buf+sizeof buf)
430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      LOG(FATAL) << "ExhaustiveTester escape: too long";
440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if(sp[i] == '\\' || sp[i] == '\"') {
450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      *p++ = '\\';
460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      *p++ = sp[i];
470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    } else if(sp[i] == '\n') {
480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      *p++ = '\\';
490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      *p++ = 'n';
500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    } else {
510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      *p++ = sp[i];
520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  *p++ = '\"';
550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  *p = '\0';
560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  return buf;
570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (!re.Match(input, 0, input.size(), anchor, m, n)) {
610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    printf("-");
620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return;
630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < n; i++) {
650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (i > 0)
660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      printf(" ");
670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (m[i].begin() == NULL)
680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      printf("-");
690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    else
700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin()));
710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Processes a single generated regexp.
750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Compiles it using Regexp interface and PCRE, and then
760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// checks that NFA, DFA, and PCRE all return the same results.
770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid ExhaustiveTester::HandleRegexp(const string& const_regexp) {
780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  regexps_++;
790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string regexp = const_regexp;
800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (!topwrapper_.empty())
810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (FLAGS_show_regexps) {
840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    printf("\r%s", regexp.c_str());
850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    fflush(stdout);
860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (LOGGING) {
890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // Write out test cases and answers for use in testing
900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // other implementations, such as Go's regexp package.
910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (randomstrings_)
920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      LOG(ERROR) << "Cannot log with random strings.";
930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (regexps_ == 1) {  // first
940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      printf("strings\n");
950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      strgen_.Reset();
960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      while (strgen_.HasNext())
970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        printf("%s\n", escape(strgen_.Next()));
980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      printf("regexps\n");
990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    printf("%s\n", escape(regexp));
1010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    RE2 re(regexp);
1030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    RE2::Options longest;
1040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    longest.set_longest_match(true);
1050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    RE2 relongest(regexp, longest);
1060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    int ngroup = re.NumberOfCapturingGroups()+1;
1070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    StringPiece* group = new StringPiece[ngroup];
1080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    strgen_.Reset();
1100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    while (strgen_.HasNext()) {
1110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      StringPiece input = strgen_.Next();
1120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
1130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      printf(";");
1140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
1150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      printf(";");
1160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
1170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      printf(";");
1180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
1190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      printf("\n");
1200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    delete[] group;
1220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return;
1230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  Tester tester(regexp);
1260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (tester.error())
1270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return;
1280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  strgen_.Reset();
1300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  strgen_.GenerateNULL();
1310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (randomstrings_)
1320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    strgen_.Random(stringseed_, stringcount_);
1330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int bad_inputs = 0;
1340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  while (strgen_.HasNext()) {
1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    tests_++;
1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (!tester.TestInput(strgen_.Next())) {
1370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      failures_++;
1380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
1390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        break;
1400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Runs an exhaustive test on the given parameters.
1450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid ExhaustiveTest(int maxatoms, int maxops,
1460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                    const vector<string>& alphabet,
1470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                    const vector<string>& ops,
1480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                    int maxstrlen, const vector<string>& stralphabet,
1490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                    const string& wrapper,
1500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                    const string& topwrapper) {
1510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (DEBUG_MODE && FLAGS_quick_debug_mode) {
1520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (maxatoms > 1)
1530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      maxatoms--;
1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (maxops > 1)
1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      maxops--;
1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (maxstrlen > 1)
1570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      maxstrlen--;
1580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
1600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                     maxstrlen, stralphabet, wrapper,
1610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                     topwrapper);
1620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  t.Generate();
1630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (!LOGGING) {
1640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
1650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin           t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
1660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_EQ(0, t.failures());
1680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Runs an exhaustive test using the given parameters and
1710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// the basic egrep operators.
1720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid EgrepTest(int maxatoms, int maxops, const string& alphabet,
1730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin               int maxstrlen, const string& stralphabet,
1740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin               const string& wrapper) {
1750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
1760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < arraysize(tops); i++) {
1780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    ExhaustiveTest(maxatoms, maxops,
1790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                   Split("", alphabet),
1800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                   RegexpGenerator::EgrepOps(),
1810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                   maxstrlen,
1820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                   Split("", stralphabet),
1830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                   wrapper,
1840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                   tops[i]);
1850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}  // namespace re2
189