10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2006-2008 The RE2 Authors. All Rights Reserved. 20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style 30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file. 40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <vector> 60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/test.h" 70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/prog.h" 80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/re2.h" 90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/regexp.h" 100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/regexp_generator.h" 110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/string_generator.h" 120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 { 140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Test that C++ strings are compared as uint8s, not int8s. 160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// PossibleMatchRange doesn't depend on this, but callers probably will. 170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(CplusplusStrings, EightBit) { 180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string s = "\x70"; 190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string t = "\xA0"; 200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_LT(s, t); 210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstruct PrefixTest { 240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const char* regexp; 250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int maxlen; 260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const char* min; 270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const char* max; 280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}; 290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic PrefixTest tests[] = { 310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "", 10, "", "", }, 320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "Abcdef", 10, "Abcdef", "Abcdef" }, 330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "abc(def|ghi)", 10, "abcdef", "abcghi" }, 340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "a+hello", 10, "aa", "ahello" }, 350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "a*hello", 10, "a", "hello" }, 360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "def|abc", 10, "abc", "def" }, 370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "a(b)(c)[d]", 10, "abcd", "abcd" }, 380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "ab(cab|cat)", 10, "abcab", "abcat" }, 390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "ab(cab|ca)x", 10, "abcabx", "abcax" }, 400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(ab|x)(c|de)", 10, "abc", "xde" }, 410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(ab|x)?(c|z)?", 10, "", "z" }, 420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "[^\\s\\S]", 10, "", "" }, 430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(abc)+", 5, "abc", "abcac" }, 440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(abc)+", 2, "ab", "ac" }, 450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(abc)+", 1, "a", "b" }, 460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "[a\xC3\xA1]", 4, "a", "\xC3\xA1" }, 470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "a*", 10, "", "ab" }, 480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)Abcdef", 10, "ABCDEF", "abcdef" }, 500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)abc(def|ghi)", 10, "ABCDEF", "abcghi" }, 510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)a+hello", 10, "AA", "ahello" }, 520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)a*hello", 10, "A", "hello" }, 530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)def|abc", 10, "ABC", "def" }, 540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)a(b)(c)[d]", 10, "ABCD", "abcd" }, 550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)ab(cab|cat)", 10, "ABCAB", "abcat" }, 560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)ab(cab|ca)x", 10, "ABCABX", "abcax" }, 570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)(ab|x)(c|de)", 10, "ABC", "xde" }, 580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)(ab|x)?(c|z)?", 10, "", "z" }, 590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)[^\\s\\S]", 10, "", "" }, 600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)(abc)+", 5, "ABC", "abcac" }, 610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)(abc)+", 2, "AB", "ac" }, 620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)(abc)+", 1, "A", "b" }, 630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)[a\xC3\xA1]", 4, "A", "\xC3\xA1" }, 640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)a*", 10, "", "ab" }, 650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)A*", 10, "", "ab" }, 660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\AAbcdef", 10, "Abcdef", "Abcdef" }, 680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\Aabc(def|ghi)", 10, "abcdef", "abcghi" }, 690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\Aa+hello", 10, "aa", "ahello" }, 700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\Aa*hello", 10, "a", "hello" }, 710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\Adef|abc", 10, "abc", "def" }, 720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\Aa(b)(c)[d]", 10, "abcd", "abcd" }, 730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\Aab(cab|cat)", 10, "abcab", "abcat" }, 740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\Aab(cab|ca)x", 10, "abcabx", "abcax" }, 750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\A(ab|x)(c|de)", 10, "abc", "xde" }, 760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\A(ab|x)?(c|z)?", 10, "", "z" }, 770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\A[^\\s\\S]", 10, "", "" }, 780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\A(abc)+", 5, "abc", "abcac" }, 790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\A(abc)+", 2, "ab", "ac" }, 800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\A(abc)+", 1, "a", "b" }, 810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\A[a\xC3\xA1]", 4, "a", "\xC3\xA1" }, 820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\Aa*", 10, "", "ab" }, 830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\AAbcdef", 10, "ABCDEF", "abcdef" }, 850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\Aabc(def|ghi)", 10, "ABCDEF", "abcghi" }, 860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\Aa+hello", 10, "AA", "ahello" }, 870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\Aa*hello", 10, "A", "hello" }, 880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\Adef|abc", 10, "ABC", "def" }, 890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\Aa(b)(c)[d]", 10, "ABCD", "abcd" }, 900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\Aab(cab|cat)", 10, "ABCAB", "abcat" }, 910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\Aab(cab|ca)x", 10, "ABCABX", "abcax" }, 920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\A(ab|x)(c|de)", 10, "ABC", "xde" }, 930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\A(ab|x)?(c|z)?", 10, "", "z" }, 940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\A[^\\s\\S]", 10, "", "" }, 950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\A(abc)+", 5, "ABC", "abcac" }, 960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\A(abc)+", 2, "AB", "ac" }, 970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\A(abc)+", 1, "A", "b" }, 980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\A[a\xC3\xA1]", 4, "A", "\xC3\xA1" }, 990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\Aa*", 10, "", "ab" }, 1000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(?i)\\AA*", 10, "", "ab" }, 1010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}; 1020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(PossibleMatchRange, HandWritten) { 1040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < arraysize(tests); i++) { 1050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int j = 0; j < 2; j++) { 1060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const PrefixTest& t = tests[i]; 1070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string min, max; 1080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (j == 0) { 1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(INFO) << "Checking regexp=" << CEscape(t.regexp); 1100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL); 1110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(re); 1120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog* prog = re->CompileToProg(0); 1130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(prog); 1140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(prog->PossibleMatchRange(&min, &max, t.maxlen)) 1150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin << " " << t.regexp; 1160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete prog; 1170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin re->Decref(); 1180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } else { 1190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen)); 1200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_EQ(t.min, min) << t.regexp; 1220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_EQ(t.max, max) << t.regexp; 1230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 1260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Test cases where PossibleMatchRange should return false. 1280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(PossibleMatchRange, Failures) { 1290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string min, max; 1300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Fails because no room to write max. 1320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0)); 1330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Fails because there is no max -- any non-empty string matches 1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // or begins a match. Have to use Latin-1 input, because there 1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // are no valid UTF-8 strings beginning with byte 0xFF. 1370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1). 1380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin PossibleMatchRange(&min, &max, 10)) 1390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin << "min=" << CEscape(min) << ", max=" << CEscape(max); 1400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1). 1410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin PossibleMatchRange(&min, &max, 10)) 1420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin << "min=" << CEscape(min) << ", max=" << CEscape(max); 1430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_FALSE(RE2(".+hello", RE2::Latin1). 1440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin PossibleMatchRange(&min, &max, 10)) 1450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin << "min=" << CEscape(min) << ", max=" << CEscape(max); 1460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_FALSE(RE2(".*hello", RE2::Latin1). 1470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin PossibleMatchRange(&min, &max, 10)) 1480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin << "min=" << CEscape(min) << ", max=" << CEscape(max); 1490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_FALSE(RE2(".*", RE2::Latin1). 1500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin PossibleMatchRange(&min, &max, 10)) 1510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin << "min=" << CEscape(min) << ", max=" << CEscape(max); 1520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_FALSE(RE2("\\C*"). 1530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin PossibleMatchRange(&min, &max, 10)) 1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin << "min=" << CEscape(min) << ", max=" << CEscape(max); 1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Fails because it's a malformed regexp. 1570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10)) 1580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin << "min=" << CEscape(min) << ", max=" << CEscape(max); 1590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 1600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Exhaustive test: generate all regexps within parameters, 1620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// then generate all strings of a given length over a given alphabet, 1630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// then check that the prefix information agrees with whether 1640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// the regexp matches each of the strings. 1650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinclass PossibleMatchTester : public RegexpGenerator { 1660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin public: 1670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin PossibleMatchTester(int maxatoms, 1680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int maxops, 1690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const vector<string>& alphabet, 1700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const vector<string>& ops, 1710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int maxstrlen, 1720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const vector<string>& stralphabet) 1730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin : RegexpGenerator(maxatoms, maxops, alphabet, ops), 1740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin strgen_(maxstrlen, stralphabet), 1750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin regexps_(0), tests_(0) { } 1760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int regexps() { return regexps_; } 1780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int tests() { return tests_; } 1790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Needed for RegexpGenerator interface. 1810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin void HandleRegexp(const string& regexp); 1820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin private: 1840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringGenerator strgen_; 1850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int regexps_; // Number of HandleRegexp calls 1870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int tests_; // Number of regexp tests. 1880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin DISALLOW_EVIL_CONSTRUCTORS(PossibleMatchTester); 1900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}; 1910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Processes a single generated regexp. 1930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Checks that all accepted strings agree with the prefix range. 1940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinvoid PossibleMatchTester::HandleRegexp(const string& regexp) { 1950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin regexps_++; 1960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin VLOG(3) << CEscape(regexp); 1980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin RE2 re(regexp, RE2::Latin1); 2000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK_EQ(re.error(), ""); 2010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string min, max; 2030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if(!re.PossibleMatchRange(&min, &max, 10)) { 2040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // There's no good max for "\\C*". Can't use strcmp 2050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // because sometimes it gets embedded in more 2060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // complicated expressions. 2070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if(strstr(regexp.c_str(), "\\C*")) 2080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return; 2090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp); 2100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin strgen_.Reset(); 2130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin while (strgen_.HasNext()) { 2140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const StringPiece& s = strgen_.Next(); 2150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin tests_++; 2160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (!RE2::FullMatch(s, re)) 2170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin continue; 2180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK_GE(s, min) << " regexp: " << regexp << " max: " << max; 2190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK_LE(s, max) << " regexp: " << regexp << " min: " << min; 2200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 2220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(PossibleMatchRange, Exhaustive) { 2240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int natom = 3; 2250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int noperator = 3; 2260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int stringlen = 5; 2270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (DEBUG_MODE) { 2280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin natom = 2; 2290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin noperator = 3; 2300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin stringlen = 3; 2310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"), 2330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin RegexpGenerator::EgrepOps(), 2340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin stringlen, Explode("ab4")); 2350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t.Generate(); 2360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(INFO) << t.regexps() << " regexps, " 2370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin << t.tests() << " tests"; 2380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 2390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} // namespace re2 241