15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector>
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/test.h"
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prog.h"
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/re2.h"
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/regexp.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/testing/regexp_generator.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/testing/string_generator.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Test that C++ strings are compared as uint8s, not int8s.
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// PossibleMatchRange doesn't depend on this, but callers probably will.
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST(CplusplusStrings, EightBit) {
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string s = "\x70";
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string t = "\xA0";
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_LT(s, t);
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct PrefixTest {
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* regexp;
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int maxlen;
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* min;
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* max;
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static PrefixTest tests[] = {
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "",                  10,  "",           "",        },
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "Abcdef",            10,  "Abcdef",     "Abcdef"   },
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "abc(def|ghi)",      10,  "abcdef",     "abcghi"   },
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "a+hello",           10,  "aa",         "ahello"   },
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "a*hello",           10,  "a",          "hello"    },
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "def|abc",           10,  "abc",        "def"      },
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "a(b)(c)[d]",        10,  "abcd",       "abcd"     },
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "ab(cab|cat)",       10,  "abcab",      "abcat"    },
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "ab(cab|ca)x",       10,  "abcabx",     "abcax"    },
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(ab|x)(c|de)",      10,  "abc",        "xde"      },
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(ab|x)?(c|z)?",     10,  "",           "z"        },
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "[^\\s\\S]",         10,  "",           ""         },
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(abc)+",             5,  "abc",        "abcac"    },
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(abc)+",             2,  "ab",         "ac"       },
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(abc)+",             1,  "a",          "b"        },
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "[a\xC3\xA1]",        4,  "a",          "\xC3\xA1" },
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "a*",                10,  "",           "ab"       },
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)Abcdef",        10,  "ABCDEF",     "abcdef"   },
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)abc(def|ghi)",  10,  "ABCDEF",     "abcghi"   },
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)a+hello",       10,  "AA",         "ahello"   },
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)a*hello",       10,  "A",          "hello"    },
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)def|abc",       10,  "ABC",        "def"      },
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)a(b)(c)[d]",    10,  "ABCD",       "abcd"     },
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)ab(cab|cat)",   10,  "ABCAB",      "abcat"    },
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)ab(cab|ca)x",   10,  "ABCABX",     "abcax"    },
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)(ab|x)(c|de)",  10,  "ABC",        "xde"      },
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)(ab|x)?(c|z)?", 10,  "",           "z"        },
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)[^\\s\\S]",     10,  "",           ""         },
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)(abc)+",         5,  "ABC",        "abcac"    },
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)(abc)+",         2,  "AB",         "ac"       },
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)(abc)+",         1,  "A",          "b"        },
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)[a\xC3\xA1]",    4,  "A",          "\xC3\xA1" },
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)a*",            10,  "",           "ab"       },
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)A*",            10,  "",           "ab"       },
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\AAbcdef",         10,  "Abcdef",     "Abcdef"   },
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\Aabc(def|ghi)",   10,  "abcdef",     "abcghi"   },
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\Aa+hello",        10,  "aa",         "ahello"   },
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\Aa*hello",        10,  "a",          "hello"    },
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\Adef|abc",        10,  "abc",        "def"      },
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\Aa(b)(c)[d]",     10,  "abcd",       "abcd"     },
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\Aab(cab|cat)",    10,  "abcab",      "abcat"    },
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\Aab(cab|ca)x",    10,  "abcabx",     "abcax"    },
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\A(ab|x)(c|de)",   10,  "abc",        "xde"      },
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\A(ab|x)?(c|z)?",  10,  "",           "z"        },
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\A[^\\s\\S]",      10,  "",           ""         },
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\A(abc)+",          5,  "abc",        "abcac"    },
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\A(abc)+",          2,  "ab",         "ac"       },
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\A(abc)+",          1,  "a",          "b"        },
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\A[a\xC3\xA1]",     4,  "a",          "\xC3\xA1" },
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\Aa*",             10,  "",           "ab"       },
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\AAbcdef",         10,  "ABCDEF",     "abcdef"   },
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\Aabc(def|ghi)",   10,  "ABCDEF",     "abcghi"   },
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\Aa+hello",        10,  "AA",         "ahello"   },
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\Aa*hello",        10,  "A",          "hello"    },
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\Adef|abc",        10,  "ABC",        "def"      },
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\Aa(b)(c)[d]",     10,  "ABCD",       "abcd"     },
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\Aab(cab|cat)",    10,  "ABCAB",      "abcat"    },
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\Aab(cab|ca)x",    10,  "ABCABX",     "abcax"    },
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\A(ab|x)(c|de)",   10,  "ABC",        "xde"      },
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\A(ab|x)?(c|z)?",  10,  "",           "z"        },
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\A[^\\s\\S]",      10,  "",           ""         },
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\A(abc)+",          5,  "ABC",        "abcac"    },
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\A(abc)+",          2,  "AB",         "ac"       },
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\A(abc)+",          1,  "A",          "b"        },
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\A[a\xC3\xA1]",     4,  "A",          "\xC3\xA1" },
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\Aa*",             10,  "",           "ab"       },
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(?i)\\AA*",             10,  "",           "ab"       },
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST(PossibleMatchRange, HandWritten) {
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < arraysize(tests); i++) {
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int j = 0; j < 2; j++) {
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const PrefixTest& t = tests[i];
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      string min, max;
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (j == 0) {
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        LOG(INFO) << "Checking regexp=" << CEscape(t.regexp);
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        CHECK(re);
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        Prog* prog = re->CompileToProg(0);
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        CHECK(prog);
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        CHECK(prog->PossibleMatchRange(&min, &max, t.maxlen))
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          << " " << t.regexp;
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        delete prog;
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        re->Decref();
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        CHECK(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen));
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      EXPECT_EQ(t.min, min) << t.regexp;
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      EXPECT_EQ(t.max, max) << t.regexp;
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Test cases where PossibleMatchRange should return false.
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST(PossibleMatchRange, Failures) {
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string min, max;
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Fails because no room to write max.
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0));
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Fails because there is no max -- any non-empty string matches
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // or begins a match.  Have to use Latin-1 input, because there
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // are no valid UTF-8 strings beginning with byte 0xFF.
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1).
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               PossibleMatchRange(&min, &max, 10))
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1).
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               PossibleMatchRange(&min, &max, 10))
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RE2(".+hello", RE2::Latin1).
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               PossibleMatchRange(&min, &max, 10))
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RE2(".*hello", RE2::Latin1).
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               PossibleMatchRange(&min, &max, 10))
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RE2(".*", RE2::Latin1).
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               PossibleMatchRange(&min, &max, 10))
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RE2("\\C*").
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               PossibleMatchRange(&min, &max, 10))
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Fails because it's a malformed regexp.
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10))
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Exhaustive test: generate all regexps within parameters,
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// then generate all strings of a given length over a given alphabet,
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// then check that the prefix information agrees with whether
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the regexp matches each of the strings.
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class PossibleMatchTester : public RegexpGenerator {
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PossibleMatchTester(int maxatoms,
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      int maxops,
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      const vector<string>& alphabet,
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      const vector<string>& ops,
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      int maxstrlen,
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      const vector<string>& stralphabet)
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : RegexpGenerator(maxatoms, maxops, alphabet, ops),
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      strgen_(maxstrlen, stralphabet),
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      regexps_(0), tests_(0) { }
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int regexps()  { return regexps_; }
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int tests()    { return tests_; }
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Needed for RegexpGenerator interface.
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void HandleRegexp(const string& regexp);
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringGenerator strgen_;
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int regexps_;   // Number of HandleRegexp calls
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int tests_;     // Number of regexp tests.
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_EVIL_CONSTRUCTORS(PossibleMatchTester);
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Processes a single generated regexp.
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Checks that all accepted strings agree with the prefix range.
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void PossibleMatchTester::HandleRegexp(const string& regexp) {
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  regexps_++;
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VLOG(3) << CEscape(regexp);
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RE2 re(regexp, RE2::Latin1);
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK_EQ(re.error(), "");
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string min, max;
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if(!re.PossibleMatchRange(&min, &max, 10)) {
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // There's no good max for "\\C*".  Can't use strcmp
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // because sometimes it gets embedded in more
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // complicated expressions.
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if(strstr(regexp.c_str(), "\\C*"))
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp);
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  strgen_.Reset();
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (strgen_.HasNext()) {
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const StringPiece& s = strgen_.Next();
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tests_++;
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!RE2::FullMatch(s, re))
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK_GE(s, min) << " regexp: " << regexp << " max: " << max;
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK_LE(s, max) << " regexp: " << regexp << " min: " << min;
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST(PossibleMatchRange, Exhaustive) {
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int natom = 3;
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int noperator = 3;
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int stringlen = 5;
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (DEBUG_MODE) {
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    natom = 2;
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    noperator = 3;
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stringlen = 3;
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"),
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 RegexpGenerator::EgrepOps(),
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 stringlen, Explode("ab4"));
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  t.Generate();
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LOG(INFO) << t.regexps() << " regexps, "
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            << t.tests() << " tests";
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
241