mimics_pcre_test.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright 2008 The RE2 Authors.  All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "util/test.h"
6#include "re2/prog.h"
7#include "re2/regexp.h"
8
9namespace re2 {
10
11struct PCRETest {
12  const char* regexp;
13  bool should_match;
14};
15
16static PCRETest tests[] = {
17  // Most things should behave exactly.
18  { "abc",       true  },
19  { "(a|b)c",    true  },
20  { "(a*|b)c",   true  },
21  { "(a|b*)c",   true  },
22  { "a(b|c)d",   true  },
23  { "a(()|())c", true  },
24  { "ab*c",      true  },
25  { "ab+c",      true  },
26  { "a(b*|c*)d", true  },
27  { "\\W",       true  },
28  { "\\W{1,2}",  true  },
29  { "\\d",       true  },
30
31  // Check that repeated empty strings do not.
32  { "(a*)*",     false },
33  { "x(a*)*y",   false },
34  { "(a*)+",     false },
35  { "(a+)*",     true  },
36  { "(a+)+",     true  },
37  { "(a+)+",     true  },
38
39  // \v is the only character class that shouldn't.
40  { "\\b",       true  },
41  { "\\v",       false },
42  { "\\d",       true  },
43
44  // The handling of ^ in multi-line mode is different, as is
45  // the handling of $ in single-line mode.  (Both involve
46  // boundary cases if the string ends with \n.)
47  { "\\A",       true  },
48  { "\\z",       true  },
49  { "(?m)^",     false },
50  { "(?m)$",     true  },
51  { "(?-m)^",    true  },
52  { "(?-m)$",    false },  // In PCRE, == \Z
53  { "(?m)\\A",   true  },
54  { "(?m)\\z",   true  },
55  { "(?-m)\\A",  true  },
56  { "(?-m)\\z",  true  },
57};
58
59TEST(MimicsPCRE, SimpleTests) {
60  for (int i = 0; i < arraysize(tests); i++) {
61    const PCRETest& t = tests[i];
62    for (int j = 0; j < 2; j++) {
63      Regexp::ParseFlags flags = Regexp::LikePerl;
64      if (j == 0)
65        flags = flags | Regexp::Latin1;
66      Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
67      CHECK(re) << " " << t.regexp;
68      CHECK_EQ(t.should_match, re->MimicsPCRE())
69        << " " << t.regexp << " "
70        << (j==0 ? "latin1" : "utf");
71      re->Decref();
72    }
73  }
74}
75
76}  // namespace re2
77