exhaustive_tester.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright 2008 The RE2 Authors.  All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Exhaustive testing of regular expression matching.
6
7// Each test picks an alphabet (e.g., "abc"), a maximum string length,
8// a maximum regular expression length, and a maximum number of letters
9// that can appear in the regular expression.  Given these parameters,
10// it tries every possible regular expression and string, verifying that
11// the NFA, DFA, and a trivial backtracking implementation agree about
12// the location of the match.
13
14#include <stdlib.h>
15#include <stdio.h>
16
17#ifndef LOGGING
18#define LOGGING 0
19#endif
20
21#include "util/test.h"
22#include "re2/testing/exhaustive_tester.h"
23#include "re2/testing/tester.h"
24
25DEFINE_bool(show_regexps, false, "show regexps during testing");
26
27DEFINE_int32(max_bad_regexp_inputs, 1,
28             "Stop testing a regular expression after finding this many "
29             "strings that break it.");
30
31// Compiled in debug mode, the usual tests run for over an hour.
32// Have to cut it down to make the unit test machines happy.
33DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode.");
34
35namespace re2 {
36
37static char* escape(const StringPiece& sp) {
38  static char buf[512];
39  char* p = buf;
40  *p++ = '\"';
41  for (int i = 0; i < sp.size(); i++) {
42    if(p+5 >= buf+sizeof buf)
43      LOG(FATAL) << "ExhaustiveTester escape: too long";
44    if(sp[i] == '\\' || sp[i] == '\"') {
45      *p++ = '\\';
46      *p++ = sp[i];
47    } else if(sp[i] == '\n') {
48      *p++ = '\\';
49      *p++ = 'n';
50    } else {
51      *p++ = sp[i];
52    }
53  }
54  *p++ = '\"';
55  *p = '\0';
56  return buf;
57}
58
59static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
60  if (!re.Match(input, 0, input.size(), anchor, m, n)) {
61    printf("-");
62    return;
63  }
64  for (int i = 0; i < n; i++) {
65    if (i > 0)
66      printf(" ");
67    if (m[i].begin() == NULL)
68      printf("-");
69    else
70      printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin()));
71  }
72}
73
74// Processes a single generated regexp.
75// Compiles it using Regexp interface and PCRE, and then
76// checks that NFA, DFA, and PCRE all return the same results.
77void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
78  regexps_++;
79  string regexp = const_regexp;
80  if (!topwrapper_.empty())
81    regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
82
83  if (FLAGS_show_regexps) {
84    printf("\r%s", regexp.c_str());
85    fflush(stdout);
86  }
87
88  if (LOGGING) {
89    // Write out test cases and answers for use in testing
90    // other implementations, such as Go's regexp package.
91    if (randomstrings_)
92      LOG(ERROR) << "Cannot log with random strings.";
93    if (regexps_ == 1) {  // first
94      printf("strings\n");
95      strgen_.Reset();
96      while (strgen_.HasNext())
97        printf("%s\n", escape(strgen_.Next()));
98      printf("regexps\n");
99    }
100    printf("%s\n", escape(regexp));
101
102    RE2 re(regexp);
103    RE2::Options longest;
104    longest.set_longest_match(true);
105    RE2 relongest(regexp, longest);
106    int ngroup = re.NumberOfCapturingGroups()+1;
107    StringPiece* group = new StringPiece[ngroup];
108
109    strgen_.Reset();
110    while (strgen_.HasNext()) {
111      StringPiece input = strgen_.Next();
112      PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
113      printf(";");
114      PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
115      printf(";");
116      PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
117      printf(";");
118      PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
119      printf("\n");
120    }
121    delete[] group;
122    return;
123  }
124
125  Tester tester(regexp);
126  if (tester.error())
127    return;
128
129  strgen_.Reset();
130  strgen_.GenerateNULL();
131  if (randomstrings_)
132    strgen_.Random(stringseed_, stringcount_);
133  int bad_inputs = 0;
134  while (strgen_.HasNext()) {
135    tests_++;
136    if (!tester.TestInput(strgen_.Next())) {
137      failures_++;
138      if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
139        break;
140    }
141  }
142}
143
144// Runs an exhaustive test on the given parameters.
145void ExhaustiveTest(int maxatoms, int maxops,
146                    const vector<string>& alphabet,
147                    const vector<string>& ops,
148                    int maxstrlen, const vector<string>& stralphabet,
149                    const string& wrapper,
150                    const string& topwrapper) {
151  if (DEBUG_MODE && FLAGS_quick_debug_mode) {
152    if (maxatoms > 1)
153      maxatoms--;
154    if (maxops > 1)
155      maxops--;
156    if (maxstrlen > 1)
157      maxstrlen--;
158  }
159  ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
160                     maxstrlen, stralphabet, wrapper,
161                     topwrapper);
162  t.Generate();
163  if (!LOGGING) {
164    printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
165           t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
166  }
167  EXPECT_EQ(0, t.failures());
168}
169
170// Runs an exhaustive test using the given parameters and
171// the basic egrep operators.
172void EgrepTest(int maxatoms, int maxops, const string& alphabet,
173               int maxstrlen, const string& stralphabet,
174               const string& wrapper) {
175  const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
176
177  for (int i = 0; i < arraysize(tops); i++) {
178    ExhaustiveTest(maxatoms, maxops,
179                   Split("", alphabet),
180                   RegexpGenerator::EgrepOps(),
181                   maxstrlen,
182                   Split("", stralphabet),
183                   wrapper,
184                   tops[i]);
185  }
186}
187
188}  // namespace re2
189