possible_match_test.cc revision 0d4c52358a1af421705c54bd8a9fdd8a30558a2e
15f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
25f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Use of this source code is governed by a BSD-style
35f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// license that can be found in the LICENSE file.
45f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
50bc735ffcfb223c0186419547abaa5c84482663eChris Lattner#include <vector>
60bc735ffcfb223c0186419547abaa5c84482663eChris Lattner#include "util/test.h"
75f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "re2/prog.h"
85f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "re2/re2.h"
95f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "re2/regexp.h"
105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "re2/testing/regexp_generator.h"
115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "re2/testing/string_generator.h"
125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencernamespace re2 {
145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Test that C++ strings are compared as uint8s, not int8s.
165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// PossibleMatchRange doesn't depend on this, but callers probably will.
175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerTEST(CplusplusStrings, EightBit) {
189c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek  string s = "\x70";
19eb50ed88c2aa040fac08bf2a50bde4dd3da6eb19Chris Lattner  string t = "\xA0";
205d75de0f821023f4ed4815825bf3aea8a0b5e40dChris Lattner  EXPECT_LT(s, t);
216137dc99ef0c2b14050631367057758b0d596cb3Ted Kremenek}
221b63e4f732dbc73d90abf886b4d21f8e3a165f6dChris Lattner
23adc4eeb08042a35ae914fc557ffec0cef3df2374Chris Lattnerstruct PrefixTest {
24c7229c338c21ef26b01ef3ecf9eec4fd373fa9ecChris Lattner  const char* regexp;
255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  int maxlen;
26cc1a875f94630e58d24a55577ffbf0e89b7da8c7Chris Lattner  const char* min;
27caaa7df2c78bbd40197823034c0275f3dcbd63e7Ted Kremenek  const char* max;
28f4d5eb4866a27d497f0bb75b12c2ffd48ad4d9c0Benjamin Kramer};
290ea76727ae91bca918a8414ed85b530eddcfedebTed Kremenek
302e22253e03e175144aeb9d13350a12fd83f858beDouglas Gregorstatic PrefixTest tests[] = {
315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "",                  10,  "",           "",        },
325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "Abcdef",            10,  "Abcdef",     "Abcdef"   },
331eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  { "abc(def|ghi)",      10,  "abcdef",     "abcghi"   },
345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "a+hello",           10,  "aa",         "ahello"   },
3588a35862fbe473f2a4f0c19f24dbe536937e1dc6Douglas Gregor  { "a*hello",           10,  "a",          "hello"    },
365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "def|abc",           10,  "abc",        "def"      },
375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "a(b)(c)[d]",        10,  "abcd",       "abcd"     },
385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "ab(cab|cat)",       10,  "abcab",      "abcat"    },
395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "ab(cab|ca)x",       10,  "abcabx",     "abcax"    },
405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(ab|x)(c|de)",      10,  "abc",        "xde"      },
412e22253e03e175144aeb9d13350a12fd83f858beDouglas Gregor  { "(ab|x)?(c|z)?",     10,  "",           "z"        },
425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "[^\\s\\S]",         10,  "",           ""         },
435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(abc)+",             5,  "abc",        "abcac"    },
445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(abc)+",             2,  "ab",         "ac"       },
45f44e854ed1e3aa86d2ed6d615ccd109d50ddcff9Douglas Gregor  { "(abc)+",             1,  "a",          "b"        },
465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "[a\xC3\xA1]",        4,  "a",          "\xC3\xA1" },
4794dc8f640ebea52241412512ed48601626edbc58Douglas Gregor  { "a*",                10,  "",           "ab"       },
4894dc8f640ebea52241412512ed48601626edbc58Douglas Gregor
49e5956bd2730c051835f9acd9e957c5d79f99e7c3Chris Lattner  { "(?i)Abcdef",        10,  "ABCDEF",     "abcdef"   },
505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)abc(def|ghi)",  10,  "ABCDEF",     "abcghi"   },
515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)a+hello",       10,  "AA",         "ahello"   },
525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)a*hello",       10,  "A",          "hello"    },
535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)def|abc",       10,  "ABC",        "def"      },
545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)a(b)(c)[d]",    10,  "ABCD",       "abcd"     },
55836040f9eafe862fb1607df5c30cd3df0c22c832Chris Lattner  { "(?i)ab(cab|cat)",   10,  "ABCAB",      "abcat"    },
56ba1e898c64048e25cb65afec3807ad463e41914bArgyrios Kyrtzidis  { "(?i)ab(cab|ca)x",   10,  "ABCABX",     "abcax"    },
57444be7366d0a1e172c0290a1ea54c1cb16b5947cDaniel Dunbar  { "(?i)(ab|x)(c|de)",  10,  "ABC",        "xde"      },
585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)(ab|x)?(c|z)?", 10,  "",           "z"        },
595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)[^\\s\\S]",     10,  "",           ""         },
605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)(abc)+",         5,  "ABC",        "abcac"    },
615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)(abc)+",         2,  "AB",         "ac"       },
621eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  { "(?i)(abc)+",         1,  "A",          "b"        },
6388a35862fbe473f2a4f0c19f24dbe536937e1dc6Douglas Gregor  { "(?i)[a\xC3\xA1]",    4,  "A",          "\xC3\xA1" },
6488a35862fbe473f2a4f0c19f24dbe536937e1dc6Douglas Gregor  { "(?i)a*",            10,  "",           "ab"       },
65a5d10c4df435964600e104ebef6a96b106e416b7Kovarththanan Rajaratnam  { "(?i)A*",            10,  "",           "ab"       },
666137dc99ef0c2b14050631367057758b0d596cb3Ted Kremenek
676137dc99ef0c2b14050631367057758b0d596cb3Ted Kremenek  { "\\AAbcdef",         10,  "Abcdef",     "Abcdef"   },
686137dc99ef0c2b14050631367057758b0d596cb3Ted Kremenek  { "\\Aabc(def|ghi)",   10,  "abcdef",     "abcghi"   },
691eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  { "\\Aa+hello",        10,  "aa",         "ahello"   },
700ea76727ae91bca918a8414ed85b530eddcfedebTed Kremenek  { "\\Aa*hello",        10,  "a",          "hello"    },
710ea76727ae91bca918a8414ed85b530eddcfedebTed Kremenek  { "\\Adef|abc",        10,  "abc",        "def"      },
720ea76727ae91bca918a8414ed85b530eddcfedebTed Kremenek  { "\\Aa(b)(c)[d]",     10,  "abcd",       "abcd"     },
731eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  { "\\Aab(cab|cat)",    10,  "abcab",      "abcat"    },
745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "\\Aab(cab|ca)x",    10,  "abcabx",     "abcax"    },
755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "\\A(ab|x)(c|de)",   10,  "abc",        "xde"      },
765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "\\A(ab|x)?(c|z)?",  10,  "",           "z"        },
775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "\\A[^\\s\\S]",      10,  "",           ""         },
785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "\\A(abc)+",          5,  "abc",        "abcac"    },
795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "\\A(abc)+",          2,  "ab",         "ac"       },
80c1f9d828c733ec1eba06d01070735d1f36fda733Chris Lattner  { "\\A(abc)+",          1,  "a",          "b"        },
815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "\\A[a\xC3\xA1]",     4,  "a",          "\xC3\xA1" },
82148772a841cae6f32db16d890e788b92a763bb3fChris Lattner  { "\\Aa*",             10,  "",           "ab"       },
83148772a841cae6f32db16d890e788b92a763bb3fChris Lattner
8492bd8c70a6837b647a6c55964f8d0a50bf561dbcJohn Thompson  { "(?i)\\AAbcdef",         10,  "ABCDEF",     "abcdef"   },
8592bd8c70a6837b647a6c55964f8d0a50bf561dbcJohn Thompson  { "(?i)\\Aabc(def|ghi)",   10,  "ABCDEF",     "abcghi"   },
861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  { "(?i)\\Aa+hello",        10,  "AA",         "ahello"   },
875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\Aa*hello",        10,  "A",          "hello"    },
88c1f9d828c733ec1eba06d01070735d1f36fda733Chris Lattner  { "(?i)\\Adef|abc",        10,  "ABC",        "def"      },
895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\Aa(b)(c)[d]",     10,  "ABCD",       "abcd"     },
905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\Aab(cab|cat)",    10,  "ABCAB",      "abcat"    },
915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\Aab(cab|ca)x",    10,  "ABCABX",     "abcax"    },
925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\A(ab|x)(c|de)",   10,  "ABC",        "xde"      },
935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\A(ab|x)?(c|z)?",  10,  "",           "z"        },
945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\A[^\\s\\S]",      10,  "",           ""         },
955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\A(abc)+",          5,  "ABC",        "abcac"    },
965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\A(abc)+",          2,  "AB",         "ac"       },
975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\A(abc)+",          1,  "A",          "b"        },
981eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  { "(?i)\\A[a\xC3\xA1]",     4,  "A",          "\xC3\xA1" },
995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\Aa*",             10,  "",           "ab"       },
1005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  { "(?i)\\AA*",             10,  "",           "ab"       },
1015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer};
1025814e657c9ad9ef6049a2a4af0d2aad248a8a15cDaniel Dunbar
1035814e657c9ad9ef6049a2a4af0d2aad248a8a15cDaniel DunbarTEST(PossibleMatchRange, HandWritten) {
1045814e657c9ad9ef6049a2a4af0d2aad248a8a15cDaniel Dunbar  for (int i = 0; i < arraysize(tests); i++) {
1051d9c54df56391ac4740db27d551782e81189cb51Chris Lattner    for (int j = 0; j < 2; j++) {
1061d9c54df56391ac4740db27d551782e81189cb51Chris Lattner      const PrefixTest& t = tests[i];
1071d9c54df56391ac4740db27d551782e81189cb51Chris Lattner      string min, max;
10888a35862fbe473f2a4f0c19f24dbe536937e1dc6Douglas Gregor      if (j == 0) {
10988a35862fbe473f2a4f0c19f24dbe536937e1dc6Douglas Gregor        LOG(INFO) << "Checking regexp=" << CEscape(t.regexp);
110a5d10c4df435964600e104ebef6a96b106e416b7Kovarththanan Rajaratnam        Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
1115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        CHECK(re);
1125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        Prog* prog = re->CompileToProg(0);
113c3222091e1ffa35d0264ca6b680a88c9dc84ede2Daniel Dunbar        CHECK(prog);
1141eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        CHECK(prog->PossibleMatchRange(&min, &max, t.maxlen))
11568d331a78e655d97294e94fcfa63f92cc1f40578Steve Naroff          << " " << t.regexp;
11668d331a78e655d97294e94fcfa63f92cc1f40578Steve Naroff        delete prog;
1171eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        re->Decref();
11868d331a78e655d97294e94fcfa63f92cc1f40578Steve Naroff      } else {
1191eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        CHECK(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen));
12068d331a78e655d97294e94fcfa63f92cc1f40578Steve Naroff      }
121083abdf67f157e9d2ab5a8c9d5e71240479d3c99Sebastian Redl      EXPECT_EQ(t.min, min) << t.regexp;
12229238a0bf7cbf5b396efb451a0adb5fe4aa037caSteve Naroff      EXPECT_EQ(t.max, max) << t.regexp;
1232e1cd4264d363ca869bf37ef160902f211d21b8cDouglas Gregor    }
1241b63e4f732dbc73d90abf886b4d21f8e3a165f6dChris Lattner  }
1251b63e4f732dbc73d90abf886b4d21f8e3a165f6dChris Lattner}
1261eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Test cases where PossibleMatchRange should return false.
1285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerTEST(PossibleMatchRange, Failures) {
1295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  string min, max;
1301eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1311eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // Fails because no room to write max.
1322e22253e03e175144aeb9d13350a12fd83f858beDouglas Gregor  EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0));
1332e22253e03e175144aeb9d13350a12fd83f858beDouglas Gregor
1341eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // Fails because there is no max -- any non-empty string matches
135f44e854ed1e3aa86d2ed6d615ccd109d50ddcff9Douglas Gregor  // or begins a match.  Have to use Latin-1 input, because there
136f44e854ed1e3aa86d2ed6d615ccd109d50ddcff9Douglas Gregor  // are no valid UTF-8 strings beginning with byte 0xFF.
137f44e854ed1e3aa86d2ed6d615ccd109d50ddcff9Douglas Gregor  EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1).
1382968442603b029949246467253eeac8139a5b6d8Douglas Gregor               PossibleMatchRange(&min, &max, 10))
1392968442603b029949246467253eeac8139a5b6d8Douglas Gregor    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1402968442603b029949246467253eeac8139a5b6d8Douglas Gregor  EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1).
141f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor               PossibleMatchRange(&min, &max, 10))
142f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor    << "min=" << CEscape(min) << ", max=" << CEscape(max);
143f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor  EXPECT_FALSE(RE2(".+hello", RE2::Latin1).
144f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor               PossibleMatchRange(&min, &max, 10))
145f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor    << "min=" << CEscape(min) << ", max=" << CEscape(max);
146f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor  EXPECT_FALSE(RE2(".*hello", RE2::Latin1).
1475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               PossibleMatchRange(&min, &max, 10))
1489c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1499c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek  EXPECT_FALSE(RE2(".*", RE2::Latin1).
150caaa7df2c78bbd40197823034c0275f3dcbd63e7Ted Kremenek               PossibleMatchRange(&min, &max, 10))
1511eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1529c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek  EXPECT_FALSE(RE2("\\C*").
1539c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek               PossibleMatchRange(&min, &max, 10))
1549c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1559c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek
1561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // Fails because it's a malformed regexp.
1579c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek  EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10))
1589c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek    << "min=" << CEscape(min) << ", max=" << CEscape(max);
1599c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek}
16023f77e59718385512984d4e2a021bef52b9f6ddfChris Lattner
1611eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump// Exhaustive test: generate all regexps within parameters,
1625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// then generate all strings of a given length over a given alphabet,
1635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// then check that the prefix information agrees with whether
1645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// the regexp matches each of the strings.
1655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerclass PossibleMatchTester : public RegexpGenerator {
1665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer public:
1676cfe7594a46b5d270142cfcb688a9c1a3a487a48Chris Lattner  PossibleMatchTester(int maxatoms,
1686cfe7594a46b5d270142cfcb688a9c1a3a487a48Chris Lattner                      int maxops,
169caaa7df2c78bbd40197823034c0275f3dcbd63e7Ted Kremenek                      const vector<string>& alphabet,
1701eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump                      const vector<string>& ops,
1715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                      int maxstrlen,
1725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                      const vector<string>& stralphabet)
1736cfe7594a46b5d270142cfcb688a9c1a3a487a48Chris Lattner    : RegexpGenerator(maxatoms, maxops, alphabet, ops),
1745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      strgen_(maxstrlen, stralphabet),
1759c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek      regexps_(0), tests_(0) { }
1769c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek
1779c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek  int regexps()  { return regexps_; }
1781eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  int tests()    { return tests_; }
1795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1809c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek  // Needed for RegexpGenerator interface.
1819c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek  void HandleRegexp(const string& regexp);
1829c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek
1839c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek private:
1849c1b750c59d510e6c9eccb1f37bccc46ccfe6844Ted Kremenek  StringGenerator strgen_;
1855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  int regexps_;   // Number of HandleRegexp calls
1871eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  int tests_;     // Number of regexp tests.
1885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  DISALLOW_EVIL_CONSTRUCTORS(PossibleMatchTester);
1905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer};
1911eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
192cc1a875f94630e58d24a55577ffbf0e89b7da8c7Chris Lattner// Processes a single generated regexp.
193cc1a875f94630e58d24a55577ffbf0e89b7da8c7Chris Lattner// Checks that all accepted strings agree with the prefix range.
194cc1a875f94630e58d24a55577ffbf0e89b7da8c7Chris Lattnervoid PossibleMatchTester::HandleRegexp(const string& regexp) {
1951eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  regexps_++;
1960ea76727ae91bca918a8414ed85b530eddcfedebTed Kremenek
19723f77e59718385512984d4e2a021bef52b9f6ddfChris Lattner  VLOG(3) << CEscape(regexp);
19823f77e59718385512984d4e2a021bef52b9f6ddfChris Lattner
1990ea76727ae91bca918a8414ed85b530eddcfedebTed Kremenek  RE2 re(regexp, RE2::Latin1);
200a5d10c4df435964600e104ebef6a96b106e416b7Kovarththanan Rajaratnam  CHECK_EQ(re.error(), "");
20123f77e59718385512984d4e2a021bef52b9f6ddfChris Lattner
20223f77e59718385512984d4e2a021bef52b9f6ddfChris Lattner  string min, max;
20323f77e59718385512984d4e2a021bef52b9f6ddfChris Lattner  if(!re.PossibleMatchRange(&min, &max, 10)) {
20423f77e59718385512984d4e2a021bef52b9f6ddfChris Lattner    // There's no good max for "\\C*".  Can't use strcmp
205f47724bf78299c7a50f008e0443c5f9f9f279ddcChris Lattner    // because sometimes it gets embedded in more
206f47724bf78299c7a50f008e0443c5f9f9f279ddcChris Lattner    // complicated expressions.
207f47724bf78299c7a50f008e0443c5f9f9f279ddcChris Lattner    if(strstr(regexp.c_str(), "\\C*"))
208f47724bf78299c7a50f008e0443c5f9f9f279ddcChris Lattner      return;
209f47724bf78299c7a50f008e0443c5f9f9f279ddcChris Lattner    LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp);
2101eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  }
2115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
2125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  strgen_.Reset();
2135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  while (strgen_.HasNext()) {
2145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    const StringPiece& s = strgen_.Next();
2155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    tests_++;
2165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (!RE2::FullMatch(s, re))
2175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      continue;
2181eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    CHECK_GE(s, min) << " regexp: " << regexp << " max: " << max;
219aa39197431a0a0b1326ecf6b3be6a11f6e2f8503Chris Lattner    CHECK_LE(s, max) << " regexp: " << regexp << " min: " << min;
220aa39197431a0a0b1326ecf6b3be6a11f6e2f8503Chris Lattner  }
221aa39197431a0a0b1326ecf6b3be6a11f6e2f8503Chris Lattner}
2221eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
2236cfe7594a46b5d270142cfcb688a9c1a3a487a48Chris LattnerTEST(PossibleMatchRange, Exhaustive) {
2246cfe7594a46b5d270142cfcb688a9c1a3a487a48Chris Lattner  int natom = 3;
2256cfe7594a46b5d270142cfcb688a9c1a3a487a48Chris Lattner  int noperator = 3;
2266cfe7594a46b5d270142cfcb688a9c1a3a487a48Chris Lattner  int stringlen = 5;
2279e0ed0bd5a3a7bac73973980ff32132a7724e674Argyrios Kyrtzidis  if (DEBUG_MODE) {
22894dc8f640ebea52241412512ed48601626edbc58Douglas Gregor    natom = 2;
22994dc8f640ebea52241412512ed48601626edbc58Douglas Gregor    noperator = 3;
23094dc8f640ebea52241412512ed48601626edbc58Douglas Gregor    stringlen = 3;
23194dc8f640ebea52241412512ed48601626edbc58Douglas Gregor  }
23294dc8f640ebea52241412512ed48601626edbc58Douglas Gregor  PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"),
233b9e1b75772db2c7db566c6034ba90a07f22e35ebDouglas Gregor                 RegexpGenerator::EgrepOps(),
23494dc8f640ebea52241412512ed48601626edbc58Douglas Gregor                 stringlen, Explode("ab4"));
235e671e1bc73615eda155059a772266ed2882d758cChris Lattner  t.Generate();
236f4d5eb4866a27d497f0bb75b12c2ffd48ad4d9c0Benjamin Kramer  LOG(INFO) << t.regexps() << " regexps, "
23703db1b31dd926409b7defc1c90b66549464652c0Argyrios Kyrtzidis            << t.tests() << " tests";
23803db1b31dd926409b7defc1c90b66549464652c0Argyrios Kyrtzidis}
23903db1b31dd926409b7defc1c90b66549464652c0Argyrios Kyrtzidis
24003db1b31dd926409b7defc1c90b66549464652c0Argyrios Kyrtzidis}  // namespace re2
24103db1b31dd926409b7defc1c90b66549464652c0Argyrios Kyrtzidis