1// -*- coding: utf-8 -*-
2//
3// Copyright (c) 2005 - 2010, Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10//     * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12//     * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16//     * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//
32// Author: Sanjay Ghemawat
33//
34// TODO: Test extractions for PartialMatch/Consume
35
36#ifdef HAVE_CONFIG_H
37#include "config.h"
38#endif
39
40#include <stdio.h>
41#include <cassert>
42#include <vector>
43#include "pcrecpp.h"
44
45using pcrecpp::StringPiece;
46using pcrecpp::RE;
47using pcrecpp::RE_Options;
48using pcrecpp::Hex;
49using pcrecpp::Octal;
50using pcrecpp::CRadix;
51
52static bool VERBOSE_TEST  = false;
53
54// CHECK dies with a fatal error if condition is not true.  It is *not*
55// controlled by NDEBUG, so the check will be executed regardless of
56// compilation mode.  Therefore, it is safe to do things like:
57//    CHECK_EQ(fp->Write(x), 4)
58#define CHECK(condition) do {                           \
59  if (!(condition)) {                                   \
60    fprintf(stderr, "%s:%d: Check failed: %s\n",        \
61            __FILE__, __LINE__, #condition);            \
62    exit(1);                                            \
63  }                                                     \
64} while (0)
65
66#define CHECK_EQ(a, b)   CHECK(a == b)
67
68static void Timing1(int num_iters) {
69  // Same pattern lots of times
70  RE pattern("ruby:\\d+");
71  StringPiece p("ruby:1234");
72  for (int j = num_iters; j > 0; j--) {
73    CHECK(pattern.FullMatch(p));
74  }
75}
76
77static void Timing2(int num_iters) {
78  // Same pattern lots of times
79  RE pattern("ruby:(\\d+)");
80  int i;
81  for (int j = num_iters; j > 0; j--) {
82    CHECK(pattern.FullMatch("ruby:1234", &i));
83    CHECK_EQ(i, 1234);
84  }
85}
86
87static void Timing3(int num_iters) {
88  string text_string;
89  for (int j = num_iters; j > 0; j--) {
90    text_string += "this is another line\n";
91  }
92
93  RE line_matcher(".*\n");
94  string line;
95  StringPiece text(text_string);
96  int counter = 0;
97  while (line_matcher.Consume(&text)) {
98    counter++;
99  }
100  printf("Matched %d lines\n", counter);
101}
102
103#if 0  // uncomment this if you have a way of defining VirtualProcessSize()
104
105static void LeakTest() {
106  // Check for memory leaks
107  unsigned long long initial_size = 0;
108  for (int i = 0; i < 100000; i++) {
109    if (i == 50000) {
110      initial_size = VirtualProcessSize();
111      printf("Size after 50000: %llu\n", initial_size);
112    }
113    char buf[100];  // definitely big enough
114    sprintf(buf, "pat%09d", i);
115    RE newre(buf);
116  }
117  uint64 final_size = VirtualProcessSize();
118  printf("Size after 100000: %llu\n", final_size);
119  const double growth = double(final_size - initial_size) / final_size;
120  printf("Growth: %0.2f%%", growth * 100);
121  CHECK(growth < 0.02);       // Allow < 2% growth
122}
123
124#endif
125
126static void RadixTests() {
127  printf("Testing hex\n");
128
129#define CHECK_HEX(type, value) \
130  do { \
131    type v; \
132    CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133    CHECK_EQ(v, 0x ## value); \
134    CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135    CHECK_EQ(v, 0x ## value); \
136  } while(0)
137
138  CHECK_HEX(short,              2bad);
139  CHECK_HEX(unsigned short,     2badU);
140  CHECK_HEX(int,                dead);
141  CHECK_HEX(unsigned int,       deadU);
142  CHECK_HEX(long,               7eadbeefL);
143  CHECK_HEX(unsigned long,      deadbeefUL);
144#ifdef HAVE_LONG_LONG
145  CHECK_HEX(long long,          12345678deadbeefLL);
146#endif
147#ifdef HAVE_UNSIGNED_LONG_LONG
148  CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149#endif
150
151#undef CHECK_HEX
152
153  printf("Testing octal\n");
154
155#define CHECK_OCTAL(type, value) \
156  do { \
157    type v; \
158    CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159    CHECK_EQ(v, 0 ## value); \
160    CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161    CHECK_EQ(v, 0 ## value); \
162  } while(0)
163
164  CHECK_OCTAL(short,              77777);
165  CHECK_OCTAL(unsigned short,     177777U);
166  CHECK_OCTAL(int,                17777777777);
167  CHECK_OCTAL(unsigned int,       37777777777U);
168  CHECK_OCTAL(long,               17777777777L);
169  CHECK_OCTAL(unsigned long,      37777777777UL);
170#ifdef HAVE_LONG_LONG
171  CHECK_OCTAL(long long,          777777777777777777777LL);
172#endif
173#ifdef HAVE_UNSIGNED_LONG_LONG
174  CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175#endif
176
177#undef CHECK_OCTAL
178
179  printf("Testing decimal\n");
180
181#define CHECK_DECIMAL(type, value) \
182  do { \
183    type v; \
184    CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185    CHECK_EQ(v, value); \
186    CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187    CHECK_EQ(v, value); \
188  } while(0)
189
190  CHECK_DECIMAL(short,              -1);
191  CHECK_DECIMAL(unsigned short,     9999);
192  CHECK_DECIMAL(int,                -1000);
193  CHECK_DECIMAL(unsigned int,       12345U);
194  CHECK_DECIMAL(long,               -10000000L);
195  CHECK_DECIMAL(unsigned long,      3083324652U);
196#ifdef HAVE_LONG_LONG
197  CHECK_DECIMAL(long long,          -100000000000000LL);
198#endif
199#ifdef HAVE_UNSIGNED_LONG_LONG
200  CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201#endif
202
203#undef CHECK_DECIMAL
204
205}
206
207static void TestReplace() {
208  printf("Testing Replace\n");
209
210  struct ReplaceTest {
211    const char *regexp;
212    const char *rewrite;
213    const char *original;
214    const char *single;
215    const char *global;
216    int global_count;         // the expected return value from ReplaceAll
217  };
218  static const ReplaceTest tests[] = {
219    { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
220      "\\2\\1ay",
221      "the quick brown fox jumps over the lazy dogs.",
222      "ethay quick brown fox jumps over the lazy dogs.",
223      "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
224      9 },
225    { "\\w+",
226      "\\0-NOSPAM",
227      "paul.haahr@google.com",
228      "paul-NOSPAM.haahr@google.com",
229      "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
230      4 },
231    { "^",
232      "(START)",
233      "foo",
234      "(START)foo",
235      "(START)foo",
236      1 },
237    { "^",
238      "(START)",
239      "",
240      "(START)",
241      "(START)",
242      1 },
243    { "$",
244      "(END)",
245      "",
246      "(END)",
247      "(END)",
248      1 },
249    { "b",
250      "bb",
251      "ababababab",
252      "abbabababab",
253      "abbabbabbabbabb",
254       5 },
255    { "b",
256      "bb",
257      "bbbbbb",
258      "bbbbbbb",
259      "bbbbbbbbbbbb",
260      6 },
261    { "b+",
262      "bb",
263      "bbbbbb",
264      "bb",
265      "bb",
266      1 },
267    { "b*",
268      "bb",
269      "bbbbbb",
270      "bb",
271      "bbbb",
272      2 },
273    { "b*",
274      "bb",
275      "aaaaa",
276      "bbaaaaa",
277      "bbabbabbabbabbabb",
278      6 },
279    { "b*",
280      "bb",
281      "aa\naa\n",
282      "bbaa\naa\n",
283      "bbabbabb\nbbabbabb\nbb",
284      7 },
285    { "b*",
286      "bb",
287      "aa\raa\r",
288      "bbaa\raa\r",
289      "bbabbabb\rbbabbabb\rbb",
290      7 },
291    { "b*",
292      "bb",
293      "aa\r\naa\r\n",
294      "bbaa\r\naa\r\n",
295      "bbabbabb\r\nbbabbabb\r\nbb",
296      7 },
297    // Check empty-string matching (it's tricky!)
298    { "aa|b*",
299      "@",
300      "aa",
301      "@",
302      "@@",
303      2 },
304    { "b*|aa",
305      "@",
306      "aa",
307      "@aa",
308      "@@@",
309      3 },
310#ifdef SUPPORT_UTF8
311    { "b*",
312      "bb",
313      "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
314      "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
315      "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
316      5 },
317    { "b*",
318      "bb",
319      "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
320      "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
321      ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
322       "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
323      9 },
324#endif
325    { "", NULL, NULL, NULL, NULL, 0 }
326  };
327
328#ifdef SUPPORT_UTF8
329  const bool support_utf8 = true;
330#else
331  const bool support_utf8 = false;
332#endif
333
334  for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
335    RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
336    assert(re.error().empty());
337    string one(t->original);
338    CHECK(re.Replace(t->rewrite, &one));
339    CHECK_EQ(one, t->single);
340    string all(t->original);
341    const int replace_count = re.GlobalReplace(t->rewrite, &all);
342    CHECK_EQ(all, t->global);
343    CHECK_EQ(replace_count, t->global_count);
344  }
345
346  // One final test: test \r\n replacement when we're not in CRLF mode
347  {
348    RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
349    assert(re.error().empty());
350    string all("aa\r\naa\r\n");
351    CHECK_EQ(re.GlobalReplace("bb", &all), 9);
352    CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
353  }
354  {
355    RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
356    assert(re.error().empty());
357    string all("aa\r\naa\r\n");
358    CHECK_EQ(re.GlobalReplace("bb", &all), 9);
359    CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
360  }
361  // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
362  //       Alas, the answer depends on how pcre was compiled.
363}
364
365static void TestExtract() {
366  printf("Testing Extract\n");
367
368  string s;
369
370  CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
371  CHECK_EQ(s, "kremvax!boris");
372
373  // check the RE interface as well
374  CHECK(RE(".*").Extract("'\\0'", "foo", &s));
375  CHECK_EQ(s, "'foo'");
376  CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
377  CHECK_EQ(s, "'foo'");
378}
379
380static void TestConsume() {
381  printf("Testing Consume\n");
382
383  string word;
384
385  string s("   aaa b!@#$@#$cccc");
386  StringPiece input(s);
387
388  RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
389  CHECK(r.Consume(&input, &word));
390  CHECK_EQ(word, "aaa");
391  CHECK(r.Consume(&input, &word));
392  CHECK_EQ(word, "b");
393  CHECK(! r.Consume(&input, &word));
394}
395
396static void TestFindAndConsume() {
397  printf("Testing FindAndConsume\n");
398
399  string word;
400
401  string s("   aaa b!@#$@#$cccc");
402  StringPiece input(s);
403
404  RE r("(\\w+)");      // matches a word
405  CHECK(r.FindAndConsume(&input, &word));
406  CHECK_EQ(word, "aaa");
407  CHECK(r.FindAndConsume(&input, &word));
408  CHECK_EQ(word, "b");
409  CHECK(r.FindAndConsume(&input, &word));
410  CHECK_EQ(word, "cccc");
411  CHECK(! r.FindAndConsume(&input, &word));
412}
413
414static void TestMatchNumberPeculiarity() {
415  printf("Testing match-number peculiaraity\n");
416
417  string word1;
418  string word2;
419  string word3;
420
421  RE r("(foo)|(bar)|(baz)");
422  CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
423  CHECK_EQ(word1, "foo");
424  CHECK_EQ(word2, "");
425  CHECK_EQ(word3, "");
426  CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
427  CHECK_EQ(word1, "");
428  CHECK_EQ(word2, "bar");
429  CHECK_EQ(word3, "");
430  CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
431  CHECK_EQ(word1, "");
432  CHECK_EQ(word2, "");
433  CHECK_EQ(word3, "baz");
434  CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
435
436  string a;
437  CHECK(RE("(foo)|hello").FullMatch("hello", &a));
438  CHECK_EQ(a, "");
439}
440
441static void TestRecursion() {
442  printf("Testing recursion\n");
443
444  // Get one string that passes (sometimes), one that never does.
445  string text_good("abcdefghijk");
446  string text_bad("acdefghijkl");
447
448  // According to pcretest, matching text_good against (\w+)*b
449  // requires match_limit of at least 8192, and match_recursion_limit
450  // of at least 37.
451
452  RE_Options options_ml;
453  options_ml.set_match_limit(8192);
454  RE re("(\\w+)*b", options_ml);
455  CHECK(re.PartialMatch(text_good) == true);
456  CHECK(re.PartialMatch(text_bad) == false);
457  CHECK(re.FullMatch(text_good) == false);
458  CHECK(re.FullMatch(text_bad) == false);
459
460  options_ml.set_match_limit(1024);
461  RE re2("(\\w+)*b", options_ml);
462  CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
463  CHECK(re2.PartialMatch(text_bad) == false);
464  CHECK(re2.FullMatch(text_good) == false);
465  CHECK(re2.FullMatch(text_bad) == false);
466
467  RE_Options options_mlr;
468  options_mlr.set_match_limit_recursion(50);
469  RE re3("(\\w+)*b", options_mlr);
470  CHECK(re3.PartialMatch(text_good) == true);
471  CHECK(re3.PartialMatch(text_bad) == false);
472  CHECK(re3.FullMatch(text_good) == false);
473  CHECK(re3.FullMatch(text_bad) == false);
474
475  options_mlr.set_match_limit_recursion(10);
476  RE re4("(\\w+)*b", options_mlr);
477  CHECK(re4.PartialMatch(text_good) == false);
478  CHECK(re4.PartialMatch(text_bad) == false);
479  CHECK(re4.FullMatch(text_good) == false);
480  CHECK(re4.FullMatch(text_bad) == false);
481}
482
483// A meta-quoted string, interpreted as a pattern, should always match
484// the original unquoted string.
485static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
486  string quoted = RE::QuoteMeta(unquoted);
487  RE re(quoted, options);
488  CHECK(re.FullMatch(unquoted));
489}
490
491// A string containing meaningful regexp characters, which is then meta-
492// quoted, should not generally match a string the unquoted string does.
493static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
494                                  RE_Options options = RE_Options()) {
495  string quoted = RE::QuoteMeta(unquoted);
496  RE re(quoted, options);
497  CHECK(!re.FullMatch(should_not_match));
498}
499
500// Tests that quoted meta characters match their original strings,
501// and that a few things that shouldn't match indeed do not.
502static void TestQuotaMetaSimple() {
503  TestQuoteMeta("foo");
504  TestQuoteMeta("foo.bar");
505  TestQuoteMeta("foo\\.bar");
506  TestQuoteMeta("[1-9]");
507  TestQuoteMeta("1.5-2.0?");
508  TestQuoteMeta("\\d");
509  TestQuoteMeta("Who doesn't like ice cream?");
510  TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
511  TestQuoteMeta("((?!)xxx).*yyy");
512  TestQuoteMeta("([");
513  TestQuoteMeta(string("foo\0bar", 7));
514}
515
516static void TestQuoteMetaSimpleNegative() {
517  NegativeTestQuoteMeta("foo", "bar");
518  NegativeTestQuoteMeta("...", "bar");
519  NegativeTestQuoteMeta("\\.", ".");
520  NegativeTestQuoteMeta("\\.", "..");
521  NegativeTestQuoteMeta("(a)", "a");
522  NegativeTestQuoteMeta("(a|b)", "a");
523  NegativeTestQuoteMeta("(a|b)", "(a)");
524  NegativeTestQuoteMeta("(a|b)", "a|b");
525  NegativeTestQuoteMeta("[0-9]", "0");
526  NegativeTestQuoteMeta("[0-9]", "0-9");
527  NegativeTestQuoteMeta("[0-9]", "[9]");
528  NegativeTestQuoteMeta("((?!)xxx)", "xxx");
529}
530
531static void TestQuoteMetaLatin1() {
532  TestQuoteMeta("3\xb2 = 9");
533}
534
535static void TestQuoteMetaUtf8() {
536#ifdef SUPPORT_UTF8
537  TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
538  TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
539  TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
540  TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
541  TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
542  TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
543  TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
544  NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
545                        "27\\\xc2\\\xb0",
546                        pcrecpp::UTF8());
547#endif
548}
549
550static void TestQuoteMetaAll() {
551  printf("Testing QuoteMeta\n");
552  TestQuotaMetaSimple();
553  TestQuoteMetaSimpleNegative();
554  TestQuoteMetaLatin1();
555  TestQuoteMetaUtf8();
556}
557
558//
559// Options tests contributed by
560// Giuseppe Maxia, CTO, Stardata s.r.l.
561// July 2005
562//
563static void GetOneOptionResult(
564                const char *option_name,
565                const char *regex,
566                const char *str,
567                RE_Options options,
568                bool full,
569                string expected) {
570
571  printf("Testing Option <%s>\n", option_name);
572  if(VERBOSE_TEST)
573    printf("/%s/ finds \"%s\" within \"%s\" \n",
574                    regex,
575                    expected.c_str(),
576                    str);
577  string captured("");
578  if (full)
579    RE(regex,options).FullMatch(str, &captured);
580  else
581    RE(regex,options).PartialMatch(str, &captured);
582  CHECK_EQ(captured, expected);
583}
584
585static void TestOneOption(
586                const char *option_name,
587                const char *regex,
588                const char *str,
589                RE_Options options,
590                bool full,
591                bool assertive = true) {
592
593  printf("Testing Option <%s>\n", option_name);
594  if (VERBOSE_TEST)
595    printf("'%s' %s /%s/ \n",
596                  str,
597                  (assertive? "matches" : "doesn't match"),
598                  regex);
599  if (assertive) {
600    if (full)
601      CHECK(RE(regex,options).FullMatch(str));
602    else
603      CHECK(RE(regex,options).PartialMatch(str));
604  } else {
605    if (full)
606      CHECK(!RE(regex,options).FullMatch(str));
607    else
608      CHECK(!RE(regex,options).PartialMatch(str));
609  }
610}
611
612static void Test_CASELESS() {
613  RE_Options options;
614  RE_Options options2;
615
616  options.set_caseless(true);
617  TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
618  TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
619  TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
620
621  TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
622  TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
623  options.set_caseless(false);
624  TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
625}
626
627static void Test_MULTILINE() {
628  RE_Options options;
629  RE_Options options2;
630  const char *str = "HELLO\n" "cruel\n" "world\n";
631
632  options.set_multiline(true);
633  TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
634  TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
635  TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
636  options.set_multiline(false);
637  TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
638}
639
640static void Test_DOTALL() {
641  RE_Options options;
642  RE_Options options2;
643  const char *str = "HELLO\n" "cruel\n" "world";
644
645  options.set_dotall(true);
646  TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
647  TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
648  TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
649  options.set_dotall(false);
650  TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
651}
652
653static void Test_DOLLAR_ENDONLY() {
654  RE_Options options;
655  RE_Options options2;
656  const char *str = "HELLO world\n";
657
658  TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
659  options.set_dollar_endonly(true);
660  TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
661  TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
662}
663
664static void Test_EXTRA() {
665  RE_Options options;
666  const char *str = "HELLO";
667
668  options.set_extra(true);
669  TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
670  TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
671  options.set_extra(false);
672  TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
673}
674
675static void Test_EXTENDED() {
676  RE_Options options;
677  RE_Options options2;
678  const char *str = "HELLO world";
679
680  options.set_extended(true);
681  TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
682  TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
683  TestOneOption("EXTENDED (class)",
684                    "^ HE L{2} O "
685                    "\\s+        "
686                    "\\w+ $      ",
687                    str,
688                    options,
689                    false);
690
691  TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
692  TestOneOption("EXTENDED (function)",
693                    "^ HE L{2} O "
694                    "\\s+        "
695                    "\\w+ $      ",
696                    str,
697                    pcrecpp::EXTENDED(),
698                    false);
699
700  options.set_extended(false);
701  TestOneOption("no EXTENDED", "HELLO world", str, options, false);
702}
703
704static void Test_NO_AUTO_CAPTURE() {
705  RE_Options options;
706  const char *str = "HELLO world";
707  string captured;
708
709  printf("Testing Option <no NO_AUTO_CAPTURE>\n");
710  if (VERBOSE_TEST)
711    printf("parentheses capture text\n");
712  RE re("(world|universe)$", options);
713  CHECK(re.Extract("\\1", str , &captured));
714  CHECK_EQ(captured, "world");
715  options.set_no_auto_capture(true);
716  printf("testing Option <NO_AUTO_CAPTURE>\n");
717  if (VERBOSE_TEST)
718    printf("parentheses do not capture text\n");
719  re.Extract("\\1",str, &captured );
720  CHECK_EQ(captured, "world");
721}
722
723static void Test_UNGREEDY() {
724  RE_Options options;
725  const char *str = "HELLO, 'this' is the 'world'";
726
727  options.set_ungreedy(true);
728  GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
729  GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
730  GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
731
732  options.set_ungreedy(false);
733  GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
734  GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
735}
736
737static void Test_all_options() {
738  const char *str = "HELLO\n" "cruel\n" "world";
739  RE_Options options;
740  options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
741
742  TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
743  options.set_all_options(0);
744  TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
745  options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
746
747  TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
748  TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
749                  " ^ c r u e l $ ",
750                  str,
751                  RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
752                  false);
753
754  TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
755                  " ^ c r u e l $ ",
756                  str,
757                  RE_Options()
758                       .set_multiline(true)
759                       .set_extended(true),
760                  false);
761
762  options.set_all_options(0);
763  TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
764
765}
766
767static void TestOptions() {
768  printf("Testing Options\n");
769  Test_CASELESS();
770  Test_MULTILINE();
771  Test_DOTALL();
772  Test_DOLLAR_ENDONLY();
773  Test_EXTENDED();
774  Test_NO_AUTO_CAPTURE();
775  Test_UNGREEDY();
776  Test_EXTRA();
777  Test_all_options();
778}
779
780static void TestConstructors() {
781  printf("Testing constructors\n");
782
783  RE_Options options;
784  options.set_dotall(true);
785  const char *str = "HELLO\n" "cruel\n" "world";
786
787  RE orig("HELLO.*world", options);
788  CHECK(orig.FullMatch(str));
789
790  RE copy1(orig);
791  CHECK(copy1.FullMatch(str));
792
793  RE copy2("not a match");
794  CHECK(!copy2.FullMatch(str));
795  copy2 = copy1;
796  CHECK(copy2.FullMatch(str));
797  copy2 = orig;
798  CHECK(copy2.FullMatch(str));
799
800  // Make sure when we assign to ourselves, nothing bad happens
801  orig = orig;
802  copy1 = copy1;
803  copy2 = copy2;
804  CHECK(orig.FullMatch(str));
805  CHECK(copy1.FullMatch(str));
806  CHECK(copy2.FullMatch(str));
807}
808
809int main(int argc, char** argv) {
810  // Treat any flag as --help
811  if (argc > 1 && argv[1][0] == '-') {
812    printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
813           "       If 'timingX ###' is specified, run the given timing test\n"
814           "       with the given number of iterations, rather than running\n"
815           "       the default corectness test.\n", argv[0]);
816    return 0;
817  }
818
819  if (argc > 1) {
820    if ( argc == 2 || atoi(argv[2]) == 0) {
821      printf("timing mode needs a num-iters argument\n");
822      return 1;
823    }
824    if (!strcmp(argv[1], "timing1"))
825      Timing1(atoi(argv[2]));
826    else if (!strcmp(argv[1], "timing2"))
827      Timing2(atoi(argv[2]));
828    else if (!strcmp(argv[1], "timing3"))
829      Timing3(atoi(argv[2]));
830    else
831      printf("Unknown argument '%s'\n", argv[1]);
832    return 0;
833  }
834
835  printf("Testing FullMatch\n");
836
837  int i;
838  string s;
839
840  /***** FullMatch with no args *****/
841
842  CHECK(RE("h.*o").FullMatch("hello"));
843  CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
844  CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
845  CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
846  CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
847  CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
848
849  /***** FullMatch with args *****/
850
851  // Zero-arg
852  CHECK(RE("\\d+").FullMatch("1001"));
853
854  // Single-arg
855  CHECK(RE("(\\d+)").FullMatch("1001",   &i));
856  CHECK_EQ(i, 1001);
857  CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
858  CHECK_EQ(i, -123);
859  CHECK(!RE("()\\d+").FullMatch("10", &i));
860  CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
861                                &i));
862
863  // Digits surrounding integer-arg
864  CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
865  CHECK_EQ(i, 23);
866  CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
867  CHECK_EQ(i, 1);
868  CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
869  CHECK_EQ(i, -1);
870  CHECK(RE("(\\d)").PartialMatch("1234", &i));
871  CHECK_EQ(i, 1);
872  CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
873  CHECK_EQ(i, -1);
874
875  // String-arg
876  CHECK(RE("h(.*)o").FullMatch("hello", &s));
877  CHECK_EQ(s, string("ell"));
878
879  // StringPiece-arg
880  StringPiece sp;
881  CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
882  CHECK_EQ(sp.size(), 4);
883  CHECK(memcmp(sp.data(), "ruby", 4) == 0);
884  CHECK_EQ(i, 1234);
885
886  // Multi-arg
887  CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
888  CHECK_EQ(s, string("ruby"));
889  CHECK_EQ(i, 1234);
890
891  // Ignore non-void* NULL arg
892  CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
893  CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
894  CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
895  CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
896#ifdef HAVE_LONG_LONG
897  CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
898#endif
899  CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
900  CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
901
902  // Fail on non-void* NULL arg if the match doesn't parse for the given type.
903  CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
904  CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
905  CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
906  CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
907  CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
908
909  // Ignored arg
910  CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
911  CHECK_EQ(s, string("ruby"));
912  CHECK_EQ(i, 1234);
913
914  // Type tests
915  {
916    char c;
917    CHECK(RE("(H)ello").FullMatch("Hello", &c));
918    CHECK_EQ(c, 'H');
919  }
920  {
921    unsigned char c;
922    CHECK(RE("(H)ello").FullMatch("Hello", &c));
923    CHECK_EQ(c, static_cast<unsigned char>('H'));
924  }
925  {
926    short v;
927    CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
928    CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
929    CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
930    CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
931    CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
932    CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
933  }
934  {
935    unsigned short v;
936    CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
937    CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
938    CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
939    CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
940  }
941  {
942    int v;
943    static const int max_value = 0x7fffffff;
944    static const int min_value = -max_value - 1;
945    CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
946    CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
947    CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
948    CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
949    CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
950    CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
951  }
952  {
953    unsigned int v;
954    static const unsigned int max_value = 0xfffffffful;
955    CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
956    CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
957    CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
958  }
959#ifdef HAVE_LONG_LONG
960# if defined(__MINGW__) || defined(__MINGW32__)
961#   define LLD "%I64d"
962#   define LLU "%I64u"
963# else
964#   define LLD "%lld"
965#   define LLU "%llu"
966# endif
967  {
968    long long v;
969    static const long long max_value = 0x7fffffffffffffffLL;
970    static const long long min_value = -max_value - 1;
971    char buf[32];  // definitely big enough for a long long
972
973    CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
974    CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
975
976    sprintf(buf, LLD, max_value);
977    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
978
979    sprintf(buf, LLD, min_value);
980    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
981
982    sprintf(buf, LLD, max_value);
983    assert(buf[strlen(buf)-1] != '9');
984    buf[strlen(buf)-1]++;
985    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
986
987    sprintf(buf, LLD, min_value);
988    assert(buf[strlen(buf)-1] != '9');
989    buf[strlen(buf)-1]++;
990    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
991  }
992#endif
993#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
994  {
995    unsigned long long v;
996    long long v2;
997    static const unsigned long long max_value = 0xffffffffffffffffULL;
998    char buf[32];  // definitely big enough for a unsigned long long
999
1000    CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1001    CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1002
1003    sprintf(buf, LLU, max_value);
1004    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1005
1006    assert(buf[strlen(buf)-1] != '9');
1007    buf[strlen(buf)-1]++;
1008    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1009  }
1010#endif
1011  {
1012    float v;
1013    CHECK(RE("(.*)").FullMatch("100", &v));
1014    CHECK(RE("(.*)").FullMatch("-100.", &v));
1015    CHECK(RE("(.*)").FullMatch("1e23", &v));
1016  }
1017  {
1018    double v;
1019    CHECK(RE("(.*)").FullMatch("100", &v));
1020    CHECK(RE("(.*)").FullMatch("-100.", &v));
1021    CHECK(RE("(.*)").FullMatch("1e23", &v));
1022  }
1023
1024  // Check that matching is fully anchored
1025  CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
1026  CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
1027  CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1028  CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1029
1030  // Braces
1031  CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1032  CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1033  CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1034
1035  // Complicated RE
1036  CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1037  CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1038  CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1039  CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1040
1041  // Check full-match handling (needs '$' tacked on internally)
1042  CHECK(RE("fo|foo").FullMatch("fo"));
1043  CHECK(RE("fo|foo").FullMatch("foo"));
1044  CHECK(RE("fo|foo$").FullMatch("fo"));
1045  CHECK(RE("fo|foo$").FullMatch("foo"));
1046  CHECK(RE("foo$").FullMatch("foo"));
1047  CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1048  CHECK(!RE("fo|bar").FullMatch("fox"));
1049
1050  // Uncomment the following if we change the handling of '$' to
1051  // prevent it from matching a trailing newline
1052  if (false) {
1053    // Check that we don't get bitten by pcre's special handling of a
1054    // '\n' at the end of the string matching '$'
1055    CHECK(!RE("foo$").PartialMatch("foo\n"));
1056  }
1057
1058  // Number of args
1059  int a[16];
1060  CHECK(RE("").FullMatch(""));
1061
1062  memset(a, 0, sizeof(0));
1063  CHECK(RE("(\\d){1}").FullMatch("1",
1064                                 &a[0]));
1065  CHECK_EQ(a[0], 1);
1066
1067  memset(a, 0, sizeof(0));
1068  CHECK(RE("(\\d)(\\d)").FullMatch("12",
1069                                   &a[0],  &a[1]));
1070  CHECK_EQ(a[0], 1);
1071  CHECK_EQ(a[1], 2);
1072
1073  memset(a, 0, sizeof(0));
1074  CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1075                                        &a[0],  &a[1],  &a[2]));
1076  CHECK_EQ(a[0], 1);
1077  CHECK_EQ(a[1], 2);
1078  CHECK_EQ(a[2], 3);
1079
1080  memset(a, 0, sizeof(0));
1081  CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1082                                             &a[0],  &a[1],  &a[2],  &a[3]));
1083  CHECK_EQ(a[0], 1);
1084  CHECK_EQ(a[1], 2);
1085  CHECK_EQ(a[2], 3);
1086  CHECK_EQ(a[3], 4);
1087
1088  memset(a, 0, sizeof(0));
1089  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1090                                                  &a[0],  &a[1],  &a[2],
1091                                                  &a[3],  &a[4]));
1092  CHECK_EQ(a[0], 1);
1093  CHECK_EQ(a[1], 2);
1094  CHECK_EQ(a[2], 3);
1095  CHECK_EQ(a[3], 4);
1096  CHECK_EQ(a[4], 5);
1097
1098  memset(a, 0, sizeof(0));
1099  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1100                                                       &a[0],  &a[1],  &a[2],
1101                                                       &a[3],  &a[4],  &a[5]));
1102  CHECK_EQ(a[0], 1);
1103  CHECK_EQ(a[1], 2);
1104  CHECK_EQ(a[2], 3);
1105  CHECK_EQ(a[3], 4);
1106  CHECK_EQ(a[4], 5);
1107  CHECK_EQ(a[5], 6);
1108
1109  memset(a, 0, sizeof(0));
1110  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1111                                                            &a[0],  &a[1],  &a[2],  &a[3],
1112                                                            &a[4],  &a[5],  &a[6]));
1113  CHECK_EQ(a[0], 1);
1114  CHECK_EQ(a[1], 2);
1115  CHECK_EQ(a[2], 3);
1116  CHECK_EQ(a[3], 4);
1117  CHECK_EQ(a[4], 5);
1118  CHECK_EQ(a[5], 6);
1119  CHECK_EQ(a[6], 7);
1120
1121  memset(a, 0, sizeof(0));
1122  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1123           "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1124               "1234567890123456",
1125               &a[0],  &a[1],  &a[2],  &a[3],
1126               &a[4],  &a[5],  &a[6],  &a[7],
1127               &a[8],  &a[9],  &a[10], &a[11],
1128               &a[12], &a[13], &a[14], &a[15]));
1129  CHECK_EQ(a[0], 1);
1130  CHECK_EQ(a[1], 2);
1131  CHECK_EQ(a[2], 3);
1132  CHECK_EQ(a[3], 4);
1133  CHECK_EQ(a[4], 5);
1134  CHECK_EQ(a[5], 6);
1135  CHECK_EQ(a[6], 7);
1136  CHECK_EQ(a[7], 8);
1137  CHECK_EQ(a[8], 9);
1138  CHECK_EQ(a[9], 0);
1139  CHECK_EQ(a[10], 1);
1140  CHECK_EQ(a[11], 2);
1141  CHECK_EQ(a[12], 3);
1142  CHECK_EQ(a[13], 4);
1143  CHECK_EQ(a[14], 5);
1144  CHECK_EQ(a[15], 6);
1145
1146  /***** PartialMatch *****/
1147
1148  printf("Testing PartialMatch\n");
1149
1150  CHECK(RE("h.*o").PartialMatch("hello"));
1151  CHECK(RE("h.*o").PartialMatch("othello"));
1152  CHECK(RE("h.*o").PartialMatch("hello!"));
1153  CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1154
1155  /***** other tests *****/
1156
1157  RadixTests();
1158  TestReplace();
1159  TestExtract();
1160  TestConsume();
1161  TestFindAndConsume();
1162  TestQuoteMetaAll();
1163  TestMatchNumberPeculiarity();
1164
1165  // Check the pattern() accessor
1166  {
1167    const string kPattern = "http://([^/]+)/.*";
1168    const RE re(kPattern);
1169    CHECK_EQ(kPattern, re.pattern());
1170  }
1171
1172  // Check RE error field.
1173  {
1174    RE re("foo");
1175    CHECK(re.error().empty());  // Must have no error
1176  }
1177
1178#ifdef SUPPORT_UTF8
1179  // Check UTF-8 handling
1180  {
1181    printf("Testing UTF-8 handling\n");
1182
1183    // Three Japanese characters (nihongo)
1184    const unsigned char utf8_string[] = {
1185         0xe6, 0x97, 0xa5, // 65e5
1186         0xe6, 0x9c, 0xac, // 627c
1187         0xe8, 0xaa, 0x9e, // 8a9e
1188         0
1189    };
1190    const unsigned char utf8_pattern[] = {
1191         '.',
1192         0xe6, 0x9c, 0xac, // 627c
1193         '.',
1194         0
1195    };
1196
1197    // Both should match in either mode, bytes or UTF-8
1198    RE re_test1(".........");
1199    CHECK(re_test1.FullMatch(utf8_string));
1200    RE re_test2("...", pcrecpp::UTF8());
1201    CHECK(re_test2.FullMatch(utf8_string));
1202
1203    // Check that '.' matches one byte or UTF-8 character
1204    // according to the mode.
1205    string ss;
1206    RE re_test3("(.)");
1207    CHECK(re_test3.PartialMatch(utf8_string, &ss));
1208    CHECK_EQ(ss, string("\xe6"));
1209    RE re_test4("(.)", pcrecpp::UTF8());
1210    CHECK(re_test4.PartialMatch(utf8_string, &ss));
1211    CHECK_EQ(ss, string("\xe6\x97\xa5"));
1212
1213    // Check that string matches itself in either mode
1214    RE re_test5(utf8_string);
1215    CHECK(re_test5.FullMatch(utf8_string));
1216    RE re_test6(utf8_string, pcrecpp::UTF8());
1217    CHECK(re_test6.FullMatch(utf8_string));
1218
1219    // Check that pattern matches string only in UTF8 mode
1220    RE re_test7(utf8_pattern);
1221    CHECK(!re_test7.FullMatch(utf8_string));
1222    RE re_test8(utf8_pattern, pcrecpp::UTF8());
1223    CHECK(re_test8.FullMatch(utf8_string));
1224  }
1225
1226  // Check that ungreedy, UTF8 regular expressions don't match when they
1227  // oughtn't -- see bug 82246.
1228  {
1229    // This code always worked.
1230    const char* pattern = "\\w+X";
1231    const string target = "a aX";
1232    RE match_sentence(pattern);
1233    RE match_sentence_re(pattern, pcrecpp::UTF8());
1234
1235    CHECK(!match_sentence.FullMatch(target));
1236    CHECK(!match_sentence_re.FullMatch(target));
1237  }
1238
1239  {
1240    const char* pattern = "(?U)\\w+X";
1241    const string target = "a aX";
1242    RE match_sentence(pattern);
1243    RE match_sentence_re(pattern, pcrecpp::UTF8());
1244
1245    CHECK(!match_sentence.FullMatch(target));
1246    CHECK(!match_sentence_re.FullMatch(target));
1247  }
1248#endif  /* def SUPPORT_UTF8 */
1249
1250  printf("Testing error reporting\n");
1251
1252  { RE re("a\\1"); CHECK(!re.error().empty()); }
1253  {
1254    RE re("a[x");
1255    CHECK(!re.error().empty());
1256  }
1257  {
1258    RE re("a[z-a]");
1259    CHECK(!re.error().empty());
1260  }
1261  {
1262    RE re("a[[:foobar:]]");
1263    CHECK(!re.error().empty());
1264  }
1265  {
1266    RE re("a(b");
1267    CHECK(!re.error().empty());
1268  }
1269  {
1270    RE re("a\\");
1271    CHECK(!re.error().empty());
1272  }
1273
1274  // Test that recursion is stopped
1275  TestRecursion();
1276
1277  // Test Options
1278  if (getenv("VERBOSE_TEST") != NULL)
1279    VERBOSE_TEST  = true;
1280  TestOptions();
1281
1282  // Test the constructors
1283  TestConstructors();
1284
1285  // Done
1286  printf("OK\n");
1287
1288  return 0;
1289}
1290