10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style
30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file.
40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <vector>
60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/test.h"
70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/prog.h"
80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/re2.h"
90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/regexp.h"
100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/regexp_generator.h"
110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/string_generator.h"
120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 {
140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Test that C++ strings are compared as uint8s, not int8s.
160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// PossibleMatchRange doesn't depend on this, but callers probably will.
170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(CplusplusStrings, EightBit) {
180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string s = "\x70";
190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string t = "\xA0";
200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_LT(s, t);
210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstruct PrefixTest {
240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  const char* regexp;
250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int maxlen;
260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  const char* min;
270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  const char* max;
280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin};
290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic PrefixTest tests[] = {
310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "",                  10,  "",           "",        },
320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "Abcdef",            10,  "Abcdef",     "Abcdef"   },
330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "abc(def|ghi)",      10,  "abcdef",     "abcghi"   },
340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "a+hello",           10,  "aa",         "ahello"   },
350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "a*hello",           10,  "a",          "hello"    },
360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "def|abc",           10,  "abc",        "def"      },
370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "a(b)(c)[d]",        10,  "abcd",       "abcd"     },
380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "ab(cab|cat)",       10,  "abcab",      "abcat"    },
390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "ab(cab|ca)x",       10,  "abcabx",     "abcax"    },
400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(ab|x)(c|de)",      10,  "abc",        "xde"      },
410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(ab|x)?(c|z)?",     10,  "",           "z"        },
420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "[^\\s\\S]",         10,  "",           ""         },
430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(abc)+",             5,  "abc",        "abcac"    },
440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(abc)+",             2,  "ab",         "ac"       },
450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(abc)+",             1,  "a",          "b"        },
460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "[a\xC3\xA1]",        4,  "a",          "\xC3\xA1" },
470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "a*",                10,  "",           "ab"       },
480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)Abcdef",        10,  "ABCDEF",     "abcdef"   },
500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)abc(def|ghi)",  10,  "ABCDEF",     "abcghi"   },
510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)a+hello",       10,  "AA",         "ahello"   },
520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)a*hello",       10,  "A",          "hello"    },
530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)def|abc",       10,  "ABC",        "def"      },
540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)a(b)(c)[d]",    10,  "ABCD",       "abcd"     },
550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)ab(cab|cat)",   10,  "ABCAB",      "abcat"    },
560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)ab(cab|ca)x",   10,  "ABCABX",     "abcax"    },
570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)(ab|x)(c|de)",  10,  "ABC",        "xde"      },
580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)(ab|x)?(c|z)?", 10,  "",           "z"        },
590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)[^\\s\\S]",     10,  "",           ""         },
600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)(abc)+",         5,  "ABC",        "abcac"    },
610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)(abc)+",         2,  "AB",         "ac"       },
620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)(abc)+",         1,  "A",          "b"        },
630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)[a\xC3\xA1]",    4,  "A",          "\xC3\xA1" },
640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)a*",            10,  "",           "ab"       },
650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)A*",            10,  "",           "ab"       },
660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\AAbcdef",         10,  "Abcdef",     "Abcdef"   },
680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\Aabc(def|ghi)",   10,  "abcdef",     "abcghi"   },
690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\Aa+hello",        10,  "aa",         "ahello"   },
700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\Aa*hello",        10,  "a",          "hello"    },
710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\Adef|abc",        10,  "abc",        "def"      },
720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\Aa(b)(c)[d]",     10,  "abcd",       "abcd"     },
730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\Aab(cab|cat)",    10,  "abcab",      "abcat"    },
740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\Aab(cab|ca)x",    10,  "abcabx",     "abcax"    },
750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\A(ab|x)(c|de)",   10,  "abc",        "xde"      },
760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\A(ab|x)?(c|z)?",  10,  "",           "z"        },
770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\A[^\\s\\S]",      10,  "",           ""         },
780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\A(abc)+",          5,  "abc",        "abcac"    },
790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\A(abc)+",          2,  "ab",         "ac"       },
800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\A(abc)+",          1,  "a",          "b"        },
810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\A[a\xC3\xA1]",     4,  "a",          "\xC3\xA1" },
820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\Aa*",             10,  "",           "ab"       },
830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\AAbcdef",         10,  "ABCDEF",     "abcdef"   },
850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\Aabc(def|ghi)",   10,  "ABCDEF",     "abcghi"   },
860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\Aa+hello",        10,  "AA",         "ahello"   },
870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\Aa*hello",        10,  "A",          "hello"    },
880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\Adef|abc",        10,  "ABC",        "def"      },
890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\Aa(b)(c)[d]",     10,  "ABCD",       "abcd"     },
900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\Aab(cab|cat)",    10,  "ABCAB",      "abcat"    },
910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\Aab(cab|ca)x",    10,  "ABCABX",     "abcax"    },
920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\A(ab|x)(c|de)",   10,  "ABC",        "xde"      },
930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\A(ab|x)?(c|z)?",  10,  "",           "z"        },
940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\A[^\\s\\S]",      10,  "",           ""         },
950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\A(abc)+",          5,  "ABC",        "abcac"    },
960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\A(abc)+",          2,  "AB",         "ac"       },
970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\A(abc)+",          1,  "A",          "b"        },
980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\A[a\xC3\xA1]",     4,  "A",          "\xC3\xA1" },
990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\Aa*",             10,  "",           "ab"       },
1000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(?i)\\AA*",             10,  "",           "ab"       },
1010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin};
1020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(PossibleMatchRange, HandWritten) {
1040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < arraysize(tests); i++) {
1050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int j = 0; j < 2; j++) {
1060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      const PrefixTest& t = tests[i];
1070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      string min, max;
1080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (j == 0) {
1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        LOG(INFO) << "Checking regexp=" << CEscape(t.regexp);
1100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
1110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        CHECK(re);
1120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        Prog* prog = re->CompileToProg(0);
1130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        CHECK(prog);
1140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        CHECK(prog->PossibleMatchRange(&min, &max, t.maxlen))
1150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin          << " " << t.regexp;
1160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        delete prog;
1170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        re->Decref();
1180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      } else {
1190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        CHECK(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen));
1200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
1210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      EXPECT_EQ(t.min, min) << t.regexp;
1220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      EXPECT_EQ(t.max, max) << t.regexp;
1230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Test cases where PossibleMatchRange should return false.
1280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(PossibleMatchRange, Failures) {
1290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string min, max;
1300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Fails because no room to write max.
1320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0));
1330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Fails because there is no max -- any non-empty string matches
1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // or begins a match.  Have to use Latin-1 input, because there
1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // are no valid UTF-8 strings beginning with byte 0xFF.
1370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1).
1380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin               PossibleMatchRange(&min, &max, 10))
1390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1).
1410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin               PossibleMatchRange(&min, &max, 10))
1420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_FALSE(RE2(".+hello", RE2::Latin1).
1440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin               PossibleMatchRange(&min, &max, 10))
1450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_FALSE(RE2(".*hello", RE2::Latin1).
1470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin               PossibleMatchRange(&min, &max, 10))
1480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_FALSE(RE2(".*", RE2::Latin1).
1500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin               PossibleMatchRange(&min, &max, 10))
1510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_FALSE(RE2("\\C*").
1530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin               PossibleMatchRange(&min, &max, 10))
1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Fails because it's a malformed regexp.
1570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10))
1580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Exhaustive test: generate all regexps within parameters,
1620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// then generate all strings of a given length over a given alphabet,
1630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// then check that the prefix information agrees with whether
1640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// the regexp matches each of the strings.
1650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinclass PossibleMatchTester : public RegexpGenerator {
1660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin public:
1670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  PossibleMatchTester(int maxatoms,
1680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                      int maxops,
1690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                      const vector<string>& alphabet,
1700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                      const vector<string>& ops,
1710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                      int maxstrlen,
1720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                      const vector<string>& stralphabet)
1730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    : RegexpGenerator(maxatoms, maxops, alphabet, ops),
1740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      strgen_(maxstrlen, stralphabet),
1750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      regexps_(0), tests_(0) { }
1760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int regexps()  { return regexps_; }
1780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int tests()    { return tests_; }
1790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Needed for RegexpGenerator interface.
1810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  void HandleRegexp(const string& regexp);
1820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin private:
1840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  StringGenerator strgen_;
1850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int regexps_;   // Number of HandleRegexp calls
1870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int tests_;     // Number of regexp tests.
1880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  DISALLOW_EVIL_CONSTRUCTORS(PossibleMatchTester);
1900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin};
1910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Processes a single generated regexp.
1930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Checks that all accepted strings agree with the prefix range.
1940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid PossibleMatchTester::HandleRegexp(const string& regexp) {
1950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  regexps_++;
1960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  VLOG(3) << CEscape(regexp);
1980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  RE2 re(regexp, RE2::Latin1);
2000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  CHECK_EQ(re.error(), "");
2010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string min, max;
2030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if(!re.PossibleMatchRange(&min, &max, 10)) {
2040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // There's no good max for "\\C*".  Can't use strcmp
2050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // because sometimes it gets embedded in more
2060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // complicated expressions.
2070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if(strstr(regexp.c_str(), "\\C*"))
2080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return;
2090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp);
2100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  strgen_.Reset();
2130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  while (strgen_.HasNext()) {
2140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    const StringPiece& s = strgen_.Next();
2150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    tests_++;
2160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (!RE2::FullMatch(s, re))
2170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      continue;
2180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK_GE(s, min) << " regexp: " << regexp << " max: " << max;
2190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK_LE(s, max) << " regexp: " << regexp << " min: " << min;
2200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
2220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(PossibleMatchRange, Exhaustive) {
2240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int natom = 3;
2250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int noperator = 3;
2260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int stringlen = 5;
2270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (DEBUG_MODE) {
2280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    natom = 2;
2290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    noperator = 3;
2300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    stringlen = 3;
2310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"),
2330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                 RegexpGenerator::EgrepOps(),
2340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                 stringlen, Explode("ab4"));
2350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  t.Generate();
2360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  LOG(INFO) << t.regexps() << " regexps, "
2370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin            << t.tests() << " tests";
2380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
2390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}  // namespace re2
241