1// -*- coding: utf-8 -*-
2//
3// Copyright (c) 2005 - 2010, Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10//     * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12//     * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16//     * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//
32// Author: Sanjay Ghemawat
33//
34// TODO: Test extractions for PartialMatch/Consume
35
36#ifdef HAVE_CONFIG_H
37#include "config.h"
38#endif
39
40#include <stdio.h>
41#include <string.h>      /* for memset and strcmp */
42#include <cassert>
43#include <vector>
44#include "pcrecpp.h"
45
46using pcrecpp::StringPiece;
47using pcrecpp::RE;
48using pcrecpp::RE_Options;
49using pcrecpp::Hex;
50using pcrecpp::Octal;
51using pcrecpp::CRadix;
52
53static bool VERBOSE_TEST  = false;
54
55// CHECK dies with a fatal error if condition is not true.  It is *not*
56// controlled by NDEBUG, so the check will be executed regardless of
57// compilation mode.  Therefore, it is safe to do things like:
58//    CHECK_EQ(fp->Write(x), 4)
59#define CHECK(condition) do {                           \
60  if (!(condition)) {                                   \
61    fprintf(stderr, "%s:%d: Check failed: %s\n",        \
62            __FILE__, __LINE__, #condition);            \
63    exit(1);                                            \
64  }                                                     \
65} while (0)
66
67#define CHECK_EQ(a, b)   CHECK(a == b)
68
69static void Timing1(int num_iters) {
70  // Same pattern lots of times
71  RE pattern("ruby:\\d+");
72  StringPiece p("ruby:1234");
73  for (int j = num_iters; j > 0; j--) {
74    CHECK(pattern.FullMatch(p));
75  }
76}
77
78static void Timing2(int num_iters) {
79  // Same pattern lots of times
80  RE pattern("ruby:(\\d+)");
81  int i;
82  for (int j = num_iters; j > 0; j--) {
83    CHECK(pattern.FullMatch("ruby:1234", &i));
84    CHECK_EQ(i, 1234);
85  }
86}
87
88static void Timing3(int num_iters) {
89  string text_string;
90  for (int j = num_iters; j > 0; j--) {
91    text_string += "this is another line\n";
92  }
93
94  RE line_matcher(".*\n");
95  string line;
96  StringPiece text(text_string);
97  int counter = 0;
98  while (line_matcher.Consume(&text)) {
99    counter++;
100  }
101  printf("Matched %d lines\n", counter);
102}
103
104#if 0  // uncomment this if you have a way of defining VirtualProcessSize()
105
106static void LeakTest() {
107  // Check for memory leaks
108  unsigned long long initial_size = 0;
109  for (int i = 0; i < 100000; i++) {
110    if (i == 50000) {
111      initial_size = VirtualProcessSize();
112      printf("Size after 50000: %llu\n", initial_size);
113    }
114    char buf[100];  // definitely big enough
115    sprintf(buf, "pat%09d", i);
116    RE newre(buf);
117  }
118  uint64 final_size = VirtualProcessSize();
119  printf("Size after 100000: %llu\n", final_size);
120  const double growth = double(final_size - initial_size) / final_size;
121  printf("Growth: %0.2f%%", growth * 100);
122  CHECK(growth < 0.02);       // Allow < 2% growth
123}
124
125#endif
126
127static void RadixTests() {
128  printf("Testing hex\n");
129
130#define CHECK_HEX(type, value) \
131  do { \
132    type v; \
133    CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134    CHECK_EQ(v, 0x ## value); \
135    CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136    CHECK_EQ(v, 0x ## value); \
137  } while(0)
138
139  CHECK_HEX(short,              2bad);
140  CHECK_HEX(unsigned short,     2badU);
141  CHECK_HEX(int,                dead);
142  CHECK_HEX(unsigned int,       deadU);
143  CHECK_HEX(long,               7eadbeefL);
144  CHECK_HEX(unsigned long,      deadbeefUL);
145#ifdef HAVE_LONG_LONG
146  CHECK_HEX(long long,          12345678deadbeefLL);
147#endif
148#ifdef HAVE_UNSIGNED_LONG_LONG
149  CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
150#endif
151
152#undef CHECK_HEX
153
154  printf("Testing octal\n");
155
156#define CHECK_OCTAL(type, value) \
157  do { \
158    type v; \
159    CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160    CHECK_EQ(v, 0 ## value); \
161    CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162    CHECK_EQ(v, 0 ## value); \
163  } while(0)
164
165  CHECK_OCTAL(short,              77777);
166  CHECK_OCTAL(unsigned short,     177777U);
167  CHECK_OCTAL(int,                17777777777);
168  CHECK_OCTAL(unsigned int,       37777777777U);
169  CHECK_OCTAL(long,               17777777777L);
170  CHECK_OCTAL(unsigned long,      37777777777UL);
171#ifdef HAVE_LONG_LONG
172  CHECK_OCTAL(long long,          777777777777777777777LL);
173#endif
174#ifdef HAVE_UNSIGNED_LONG_LONG
175  CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
176#endif
177
178#undef CHECK_OCTAL
179
180  printf("Testing decimal\n");
181
182#define CHECK_DECIMAL(type, value) \
183  do { \
184    type v; \
185    CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186    CHECK_EQ(v, value); \
187    CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188    CHECK_EQ(v, value); \
189  } while(0)
190
191  CHECK_DECIMAL(short,              -1);
192  CHECK_DECIMAL(unsigned short,     9999);
193  CHECK_DECIMAL(int,                -1000);
194  CHECK_DECIMAL(unsigned int,       12345U);
195  CHECK_DECIMAL(long,               -10000000L);
196  CHECK_DECIMAL(unsigned long,      3083324652U);
197#ifdef HAVE_LONG_LONG
198  CHECK_DECIMAL(long long,          -100000000000000LL);
199#endif
200#ifdef HAVE_UNSIGNED_LONG_LONG
201  CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
202#endif
203
204#undef CHECK_DECIMAL
205
206}
207
208static void TestReplace() {
209  printf("Testing Replace\n");
210
211  struct ReplaceTest {
212    const char *regexp;
213    const char *rewrite;
214    const char *original;
215    const char *single;
216    const char *global;
217    int global_count;         // the expected return value from ReplaceAll
218  };
219  static const ReplaceTest tests[] = {
220    { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221      "\\2\\1ay",
222      "the quick brown fox jumps over the lazy dogs.",
223      "ethay quick brown fox jumps over the lazy dogs.",
224      "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
225      9 },
226    { "\\w+",
227      "\\0-NOSPAM",
228      "paul.haahr@google.com",
229      "paul-NOSPAM.haahr@google.com",
230      "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
231      4 },
232    { "^",
233      "(START)",
234      "foo",
235      "(START)foo",
236      "(START)foo",
237      1 },
238    { "^",
239      "(START)",
240      "",
241      "(START)",
242      "(START)",
243      1 },
244    { "$",
245      "(END)",
246      "",
247      "(END)",
248      "(END)",
249      1 },
250    { "b",
251      "bb",
252      "ababababab",
253      "abbabababab",
254      "abbabbabbabbabb",
255       5 },
256    { "b",
257      "bb",
258      "bbbbbb",
259      "bbbbbbb",
260      "bbbbbbbbbbbb",
261      6 },
262    { "b+",
263      "bb",
264      "bbbbbb",
265      "bb",
266      "bb",
267      1 },
268    { "b*",
269      "bb",
270      "bbbbbb",
271      "bb",
272      "bbbb",
273      2 },
274    { "b*",
275      "bb",
276      "aaaaa",
277      "bbaaaaa",
278      "bbabbabbabbabbabb",
279      6 },
280    { "b*",
281      "bb",
282      "aa\naa\n",
283      "bbaa\naa\n",
284      "bbabbabb\nbbabbabb\nbb",
285      7 },
286    { "b*",
287      "bb",
288      "aa\raa\r",
289      "bbaa\raa\r",
290      "bbabbabb\rbbabbabb\rbb",
291      7 },
292    { "b*",
293      "bb",
294      "aa\r\naa\r\n",
295      "bbaa\r\naa\r\n",
296      "bbabbabb\r\nbbabbabb\r\nbb",
297      7 },
298    // Check empty-string matching (it's tricky!)
299    { "aa|b*",
300      "@",
301      "aa",
302      "@",
303      "@@",
304      2 },
305    { "b*|aa",
306      "@",
307      "aa",
308      "@aa",
309      "@@@",
310      3 },
311#ifdef SUPPORT_UTF8
312    { "b*",
313      "bb",
314      "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
315      "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316      "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
317      5 },
318    { "b*",
319      "bb",
320      "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
321      "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322      ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323       "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
324      9 },
325#endif
326    { "", NULL, NULL, NULL, NULL, 0 }
327  };
328
329#ifdef SUPPORT_UTF8
330  const bool support_utf8 = true;
331#else
332  const bool support_utf8 = false;
333#endif
334
335  for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
336    RE re(t->regexp, RE_Options().set_newline_mode(PCRE2_NEWLINE_CRLF)
337                                 .set_utf(support_utf8));
338    assert(re.error().empty());
339    string one(t->original);
340    CHECK(re.Replace(t->rewrite, &one));
341    CHECK_EQ(one, t->single);
342    string all(t->original);
343    const int replace_count = re.GlobalReplace(t->rewrite, &all);
344    CHECK_EQ(all, t->global);
345    CHECK_EQ(replace_count, t->global_count);
346  }
347
348  // One final test: test \r\n replacement when we're not in CRLF mode
349  {
350    RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_CR)
351                            .set_utf(support_utf8));
352    assert(re.error().empty());
353    string all("aa\r\naa\r\n");
354    CHECK_EQ(re.GlobalReplace("bb", &all), 9);
355    CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
356  }
357  {
358    RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_LF)
359                            .set_utf(support_utf8));
360    assert(re.error().empty());
361    string all("aa\r\naa\r\n");
362    CHECK_EQ(re.GlobalReplace("bb", &all), 9);
363    CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
364  }
365  // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
366  //       Alas, the answer depends on how pcre was compiled.
367}
368
369static void TestExtract() {
370  printf("Testing Extract\n");
371
372  string s;
373
374  CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
375  CHECK_EQ(s, "kremvax!boris");
376
377  // check the RE interface as well
378  CHECK(RE(".*").Extract("'\\0'", "foo", &s));
379  CHECK_EQ(s, "'foo'");
380  CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
381  CHECK_EQ(s, "'foo'");
382}
383
384static void TestConsume() {
385  printf("Testing Consume\n");
386
387  string word;
388
389  string s("   aaa b!@#$@#$cccc");
390  StringPiece input(s);
391
392  RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
393  CHECK(r.Consume(&input, &word));
394  CHECK_EQ(word, "aaa");
395  CHECK(r.Consume(&input, &word));
396  CHECK_EQ(word, "b");
397  CHECK(! r.Consume(&input, &word));
398}
399
400static void TestFindAndConsume() {
401  printf("Testing FindAndConsume\n");
402
403  string word;
404
405  string s("   aaa b!@#$@#$cccc");
406  StringPiece input(s);
407
408  RE r("(\\w+)");      // matches a word
409  CHECK(r.FindAndConsume(&input, &word));
410  CHECK_EQ(word, "aaa");
411  CHECK(r.FindAndConsume(&input, &word));
412  CHECK_EQ(word, "b");
413  CHECK(r.FindAndConsume(&input, &word));
414  CHECK_EQ(word, "cccc");
415  CHECK(! r.FindAndConsume(&input, &word));
416}
417
418static void TestMatchNumberPeculiarity() {
419  printf("Testing match-number peculiarity\n");
420
421  string word1;
422  string word2;
423  string word3;
424
425  RE r("(foo)|(bar)|(baz)");
426  CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
427  CHECK_EQ(word1, "foo");
428  CHECK_EQ(word2, "");
429  CHECK_EQ(word3, "");
430  CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
431  CHECK_EQ(word1, "");
432  CHECK_EQ(word2, "bar");
433  CHECK_EQ(word3, "");
434  CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
435  CHECK_EQ(word1, "");
436  CHECK_EQ(word2, "");
437  CHECK_EQ(word3, "baz");
438  CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
439
440  string a;
441  CHECK(RE("(foo)|hello").FullMatch("hello", &a));
442  CHECK_EQ(a, "");
443}
444
445static void TestRecursion() {
446  printf("Testing recursion\n");
447
448  // Get one string that passes (sometimes), one that never does.
449  string text_good("abcdefghijk");
450  string text_bad("acdefghijkl");
451
452  // According to pcretest, matching text_good against (\w+)*b
453  // requires match_limit of at least 8192, and match_recursion_limit
454  // of at least 37.
455
456  RE_Options options_ml;
457  options_ml.set_match_limit(8192);
458  RE re("(\\w+)*b", options_ml);
459  CHECK(re.PartialMatch(text_good) == true);
460  CHECK(re.PartialMatch(text_bad) == false);
461  CHECK(re.FullMatch(text_good) == false);
462  CHECK(re.FullMatch(text_bad) == false);
463
464  options_ml.set_match_limit(1024);
465  RE re2("(\\w+)*b", options_ml);
466  CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
467  CHECK(re2.PartialMatch(text_bad) == false);
468  CHECK(re2.FullMatch(text_good) == false);
469  CHECK(re2.FullMatch(text_bad) == false);
470
471  RE_Options options_mlr;
472  options_mlr.set_match_limit_recursion(50);
473  RE re3("(\\w+)*b", options_mlr);
474  CHECK(re3.PartialMatch(text_good) == true);
475  CHECK(re3.PartialMatch(text_bad) == false);
476  CHECK(re3.FullMatch(text_good) == false);
477  CHECK(re3.FullMatch(text_bad) == false);
478
479  options_mlr.set_match_limit_recursion(10);
480  RE re4("(\\w+)*b", options_mlr);
481  CHECK(re4.PartialMatch(text_good) == false);
482  CHECK(re4.PartialMatch(text_bad) == false);
483  CHECK(re4.FullMatch(text_good) == false);
484  CHECK(re4.FullMatch(text_bad) == false);
485}
486
487// A meta-quoted string, interpreted as a pattern, should always match
488// the original unquoted string.
489static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
490  string quoted = RE::QuoteMeta(unquoted);
491  RE re(quoted, options);
492  CHECK(re.FullMatch(unquoted));
493}
494
495// A string containing meaningful regexp characters, which is then meta-
496// quoted, should not generally match a string the unquoted string does.
497static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
498                                  RE_Options options = RE_Options()) {
499  string quoted = RE::QuoteMeta(unquoted);
500  RE re(quoted, options);
501  CHECK(!re.FullMatch(should_not_match));
502}
503
504// Tests that quoted meta characters match their original strings,
505// and that a few things that shouldn't match indeed do not.
506static void TestQuotaMetaSimple() {
507  TestQuoteMeta("foo");
508  TestQuoteMeta("foo.bar");
509  TestQuoteMeta("foo\\.bar");
510  TestQuoteMeta("[1-9]");
511  TestQuoteMeta("1.5-2.0?");
512  TestQuoteMeta("\\d");
513  TestQuoteMeta("Who doesn't like ice cream?");
514  TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
515  TestQuoteMeta("((?!)xxx).*yyy");
516  TestQuoteMeta("([");
517  TestQuoteMeta(string("foo\0bar", 7));
518}
519
520static void TestQuoteMetaSimpleNegative() {
521  NegativeTestQuoteMeta("foo", "bar");
522  NegativeTestQuoteMeta("...", "bar");
523  NegativeTestQuoteMeta("\\.", ".");
524  NegativeTestQuoteMeta("\\.", "..");
525  NegativeTestQuoteMeta("(a)", "a");
526  NegativeTestQuoteMeta("(a|b)", "a");
527  NegativeTestQuoteMeta("(a|b)", "(a)");
528  NegativeTestQuoteMeta("(a|b)", "a|b");
529  NegativeTestQuoteMeta("[0-9]", "0");
530  NegativeTestQuoteMeta("[0-9]", "0-9");
531  NegativeTestQuoteMeta("[0-9]", "[9]");
532  NegativeTestQuoteMeta("((?!)xxx)", "xxx");
533}
534
535static void TestQuoteMetaLatin1() {
536  TestQuoteMeta("3\xb2 = 9");
537}
538
539static void TestQuoteMetaUtf8() {
540#ifdef SUPPORT_UTF8
541  TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
542  TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
543  TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
544  TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
545  TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
546  TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
547  TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
548  NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
549                        "27\\\xc2\\\xb0",
550                        pcrecpp::UTF8());
551#endif
552}
553
554static void TestQuoteMetaAll() {
555  printf("Testing QuoteMeta\n");
556  TestQuotaMetaSimple();
557  TestQuoteMetaSimpleNegative();
558  TestQuoteMetaLatin1();
559  TestQuoteMetaUtf8();
560}
561
562//
563// Options tests contributed by
564// Giuseppe Maxia, CTO, Stardata s.r.l.
565// July 2005
566//
567static void GetOneOptionResult(
568                const char *option_name,
569                const char *regex,
570                const char *str,
571                RE_Options options,
572                bool full,
573                string expected) {
574
575  printf("Testing Option <%s>\n", option_name);
576  if(VERBOSE_TEST)
577    printf("/%s/ finds \"%s\" within \"%s\" \n",
578                    regex,
579                    expected.c_str(),
580                    str);
581  string captured("");
582  if (full)
583    RE(regex,options).FullMatch(str, &captured);
584  else
585    RE(regex,options).PartialMatch(str, &captured);
586  CHECK_EQ(captured, expected);
587}
588
589static void TestOneOption(
590                const char *option_name,
591                const char *regex,
592                const char *str,
593                RE_Options options,
594                bool full,
595                bool assertive = true) {
596
597  printf("Testing Option <%s>\n", option_name);
598  if (VERBOSE_TEST)
599    printf("'%s' %s /%s/ \n",
600                  str,
601                  (assertive? "matches" : "doesn't match"),
602                  regex);
603  if (assertive) {
604    if (full)
605      CHECK(RE(regex,options).FullMatch(str));
606    else
607      CHECK(RE(regex,options).PartialMatch(str));
608  } else {
609    if (full)
610      CHECK(!RE(regex,options).FullMatch(str));
611    else
612      CHECK(!RE(regex,options).PartialMatch(str));
613  }
614}
615
616static void Test_CASELESS() {
617  RE_Options options;
618  RE_Options options2;
619
620  options.set_caseless(true);
621  TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
622  TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
623  TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
624
625  TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
626  TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
627  options.set_caseless(false);
628  TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
629}
630
631static void Test_MULTILINE() {
632  RE_Options options;
633  RE_Options options2;
634  const char *str = "HELLO\n" "cruel\n" "world\n";
635
636  options.set_multiline(true);
637  TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
638  TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
639  TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
640  options.set_multiline(false);
641  TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
642}
643
644static void Test_DOTALL() {
645  RE_Options options;
646  RE_Options options2;
647  const char *str = "HELLO\n" "cruel\n" "world";
648
649  options.set_dotall(true);
650  TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
651  TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
652  TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
653  options.set_dotall(false);
654  TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
655}
656
657static void Test_DOLLAR_ENDONLY() {
658  RE_Options options;
659  RE_Options options2;
660  const char *str = "HELLO world\n";
661
662  TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
663  options.set_dollar_endonly(true);
664  TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
665  TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
666}
667
668static void Test_EXTENDED() {
669  RE_Options options;
670  RE_Options options2;
671  const char *str = "HELLO world";
672
673  options.set_extended(true);
674  TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
675  TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
676  TestOneOption("EXTENDED (class)",
677                    "^ HE L{2} O "
678                    "\\s+        "
679                    "\\w+ $      ",
680                    str,
681                    options,
682                    false);
683
684  TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
685  TestOneOption("EXTENDED (function)",
686                    "^ HE L{2} O "
687                    "\\s+        "
688                    "\\w+ $      ",
689                    str,
690                    pcrecpp::EXTENDED(),
691                    false);
692
693  options.set_extended(false);
694  TestOneOption("no EXTENDED", "HELLO world", str, options, false);
695}
696
697static void Test_NO_AUTO_CAPTURE() {
698  RE_Options options;
699  const char *str = "HELLO world";
700  string captured;
701
702  printf("Testing Option <no NO_AUTO_CAPTURE>\n");
703  if (VERBOSE_TEST)
704    printf("parentheses capture text\n");
705  RE re("(world|universe)$", options);
706  CHECK(re.Extract("\\1", str , &captured));
707  CHECK_EQ(captured, "world");
708  options.set_no_auto_capture(true);
709  printf("testing Option <NO_AUTO_CAPTURE>\n");
710  if (VERBOSE_TEST)
711    printf("parentheses do not capture text\n");
712  re.Extract("\\1",str, &captured );
713  CHECK_EQ(captured, "world");
714}
715
716static void Test_UNGREEDY() {
717  RE_Options options;
718  const char *str = "HELLO, 'this' is the 'world'";
719
720  options.set_ungreedy(true);
721  GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
722  GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
723  GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
724
725  options.set_ungreedy(false);
726  GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
727  GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
728}
729
730static void Test_all_options() {
731  const char *str = "HELLO\n" "cruel\n" "world";
732  RE_Options options;
733  options.set_all_options(PCRE2_CASELESS | PCRE2_DOTALL);
734
735  TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
736  options.set_all_options(0);
737  TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
738  options.set_all_options(PCRE2_MULTILINE | PCRE2_EXTENDED);
739
740  TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
741  TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
742                  " ^ c r u e l $ ",
743                  str,
744                  RE_Options(PCRE2_MULTILINE | PCRE2_EXTENDED),
745                  false);
746
747  TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
748                  " ^ c r u e l $ ",
749                  str,
750                  RE_Options()
751                       .set_multiline(true)
752                       .set_extended(true),
753                  false);
754
755  options.set_all_options(0);
756  TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
757
758}
759
760static void TestOptions() {
761  printf("Testing Options\n");
762  Test_CASELESS();
763  Test_MULTILINE();
764  Test_DOTALL();
765  Test_DOLLAR_ENDONLY();
766  Test_EXTENDED();
767  Test_NO_AUTO_CAPTURE();
768  Test_UNGREEDY();
769  Test_all_options();
770}
771
772static void TestConstructors() {
773  printf("Testing constructors\n");
774
775  RE_Options options;
776  options.set_dotall(true);
777  const char *str = "HELLO\n" "cruel\n" "world";
778
779  RE orig("HELLO.*world", options);
780  CHECK(orig.FullMatch(str));
781
782  RE copy1(orig);
783  CHECK(copy1.FullMatch(str));
784
785  RE copy2("not a match");
786  CHECK(!copy2.FullMatch(str));
787  copy2 = copy1;
788  CHECK(copy2.FullMatch(str));
789  copy2 = orig;
790  CHECK(copy2.FullMatch(str));
791
792  // Make sure when we assign to ourselves, nothing bad happens
793  orig = orig;
794  copy1 = copy1;
795  copy2 = copy2;
796  CHECK(orig.FullMatch(str));
797  CHECK(copy1.FullMatch(str));
798  CHECK(copy2.FullMatch(str));
799}
800
801int main(int argc, char** argv) {
802  // Treat any flag as --help
803  if (argc > 1 && argv[1][0] == '-') {
804    printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
805           "       If 'timingX ###' is specified, run the given timing test\n"
806           "       with the given number of iterations, rather than running\n"
807           "       the default corectness test.\n", argv[0]);
808    return 0;
809  }
810
811  if (argc > 1) {
812    if ( argc == 2 || atoi(argv[2]) == 0) {
813      printf("timing mode needs a num-iters argument\n");
814      return 1;
815    }
816    if (!strcmp(argv[1], "timing1"))
817      Timing1(atoi(argv[2]));
818    else if (!strcmp(argv[1], "timing2"))
819      Timing2(atoi(argv[2]));
820    else if (!strcmp(argv[1], "timing3"))
821      Timing3(atoi(argv[2]));
822    else
823      printf("Unknown argument '%s'\n", argv[1]);
824    return 0;
825  }
826
827  printf("PCRE C++ wrapper tests\n");
828  printf("Testing FullMatch\n");
829
830  int i;
831  string s;
832
833  /***** FullMatch with no args *****/
834
835  CHECK(RE("h.*o").FullMatch("hello"));
836  CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
837  CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
838  CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
839  CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
840  CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
841
842  /***** FullMatch with args *****/
843
844  // Zero-arg
845  CHECK(RE("\\d+").FullMatch("1001"));
846
847  // Single-arg
848  CHECK(RE("(\\d+)").FullMatch("1001",   &i));
849  CHECK_EQ(i, 1001);
850  CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
851  CHECK_EQ(i, -123);
852  CHECK(!RE("()\\d+").FullMatch("10", &i));
853  CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
854                                &i));
855
856  // Digits surrounding integer-arg
857  CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
858  CHECK_EQ(i, 23);
859  CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
860  CHECK_EQ(i, 1);
861  CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
862  CHECK_EQ(i, -1);
863  CHECK(RE("(\\d)").PartialMatch("1234", &i));
864  CHECK_EQ(i, 1);
865  CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
866  CHECK_EQ(i, -1);
867
868  // String-arg
869  CHECK(RE("h(.*)o").FullMatch("hello", &s));
870  CHECK_EQ(s, string("ell"));
871
872  // StringPiece-arg
873  StringPiece sp;
874  CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
875  CHECK_EQ(sp.size(), 4);
876  CHECK(memcmp(sp.data(), "ruby", 4) == 0);
877  CHECK_EQ(i, 1234);
878
879  // Multi-arg
880  CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
881  CHECK_EQ(s, string("ruby"));
882  CHECK_EQ(i, 1234);
883
884  // Ignore non-void* NULL arg
885  CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
886  CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
887  CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
888  CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
889#ifdef HAVE_LONG_LONG
890  CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
891#endif
892  CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
893  CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
894
895  // Fail on non-void* NULL arg if the match doesn't parse for the given type.
896  CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
897  CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
898  CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
899  CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
900  CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
901
902  // Ignored arg
903  CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
904  CHECK_EQ(s, string("ruby"));
905  CHECK_EQ(i, 1234);
906
907  // Type tests
908  {
909    char c;
910    CHECK(RE("(H)ello").FullMatch("Hello", &c));
911    CHECK_EQ(c, 'H');
912  }
913  {
914    unsigned char c;
915    CHECK(RE("(H)ello").FullMatch("Hello", &c));
916    CHECK_EQ(c, static_cast<unsigned char>('H'));
917  }
918  {
919    short v;
920    CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
921    CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
922    CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
923    CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
924    CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
925    CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
926  }
927  {
928    unsigned short v;
929    CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
930    CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
931    CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
932    CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
933  }
934  {
935    int v;
936    static const int max_value = 0x7fffffff;
937    static const int min_value = -max_value - 1;
938    CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
939    CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
940    CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
941    CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
942    CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
943    CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
944  }
945  {
946    unsigned int v;
947    static const unsigned int max_value = 0xfffffffful;
948    CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
949    CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
950    CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
951  }
952#ifdef HAVE_LONG_LONG
953# if defined(__MINGW__) || defined(__MINGW32__)
954#   define LLD "%I64d"
955#   define LLU "%I64u"
956# else
957#   define LLD "%lld"
958#   define LLU "%llu"
959# endif
960  {
961    long long v;
962    static const long long max_value = 0x7fffffffffffffffLL;
963    static const long long min_value = -max_value - 1;
964    char buf[32];  // definitely big enough for a long long
965
966    CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
967    CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
968
969    sprintf(buf, LLD, max_value);
970    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
971
972    sprintf(buf, LLD, min_value);
973    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
974
975    sprintf(buf, LLD, max_value);
976    assert(buf[strlen(buf)-1] != '9');
977    buf[strlen(buf)-1]++;
978    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
979
980    sprintf(buf, LLD, min_value);
981    assert(buf[strlen(buf)-1] != '9');
982    buf[strlen(buf)-1]++;
983    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
984  }
985#endif
986#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
987  {
988    unsigned long long v;
989    long long v2;
990    static const unsigned long long max_value = 0xffffffffffffffffULL;
991    char buf[32];  // definitely big enough for a unsigned long long
992
993    CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
994    CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
995
996    sprintf(buf, LLU, max_value);
997    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
998
999    assert(buf[strlen(buf)-1] != '9');
1000    buf[strlen(buf)-1]++;
1001    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1002  }
1003#endif
1004  {
1005    float v;
1006    CHECK(RE("(.*)").FullMatch("100", &v));
1007    CHECK(RE("(.*)").FullMatch("-100.", &v));
1008    CHECK(RE("(.*)").FullMatch("1e23", &v));
1009  }
1010  {
1011    double v;
1012    CHECK(RE("(.*)").FullMatch("100", &v));
1013    CHECK(RE("(.*)").FullMatch("-100.", &v));
1014    CHECK(RE("(.*)").FullMatch("1e23", &v));
1015  }
1016
1017  // Check that matching is fully anchored
1018  CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
1019  CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
1020  CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1021  CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1022
1023  // Braces
1024  CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1025  CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1026  CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1027
1028  // Complicated RE
1029  CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1030  CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1031  CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1032  CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1033
1034  // Check full-match handling (needs '$' tacked on internally)
1035  CHECK(RE("fo|foo").FullMatch("fo"));
1036  CHECK(RE("fo|foo").FullMatch("foo"));
1037  CHECK(RE("fo|foo$").FullMatch("fo"));
1038  CHECK(RE("fo|foo$").FullMatch("foo"));
1039  CHECK(RE("foo$").FullMatch("foo"));
1040  CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1041  CHECK(!RE("fo|bar").FullMatch("fox"));
1042
1043  // Uncomment the following if we change the handling of '$' to
1044  // prevent it from matching a trailing newline
1045  if (false) {
1046    // Check that we don't get bitten by pcre's special handling of a
1047    // '\n' at the end of the string matching '$'
1048    CHECK(!RE("foo$").PartialMatch("foo\n"));
1049  }
1050
1051  // Number of args
1052  int a[16];
1053  CHECK(RE("").FullMatch(""));
1054
1055  memset(a, 0, sizeof(0));
1056  CHECK(RE("(\\d){1}").FullMatch("1",
1057                                 &a[0]));
1058  CHECK_EQ(a[0], 1);
1059
1060  memset(a, 0, sizeof(0));
1061  CHECK(RE("(\\d)(\\d)").FullMatch("12",
1062                                   &a[0],  &a[1]));
1063  CHECK_EQ(a[0], 1);
1064  CHECK_EQ(a[1], 2);
1065
1066  memset(a, 0, sizeof(0));
1067  CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1068                                        &a[0],  &a[1],  &a[2]));
1069  CHECK_EQ(a[0], 1);
1070  CHECK_EQ(a[1], 2);
1071  CHECK_EQ(a[2], 3);
1072
1073  memset(a, 0, sizeof(0));
1074  CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1075                                             &a[0],  &a[1],  &a[2],  &a[3]));
1076  CHECK_EQ(a[0], 1);
1077  CHECK_EQ(a[1], 2);
1078  CHECK_EQ(a[2], 3);
1079  CHECK_EQ(a[3], 4);
1080
1081  memset(a, 0, sizeof(0));
1082  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1083                                                  &a[0],  &a[1],  &a[2],
1084                                                  &a[3],  &a[4]));
1085  CHECK_EQ(a[0], 1);
1086  CHECK_EQ(a[1], 2);
1087  CHECK_EQ(a[2], 3);
1088  CHECK_EQ(a[3], 4);
1089  CHECK_EQ(a[4], 5);
1090
1091  memset(a, 0, sizeof(0));
1092  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1093                                                       &a[0],  &a[1],  &a[2],
1094                                                       &a[3],  &a[4],  &a[5]));
1095  CHECK_EQ(a[0], 1);
1096  CHECK_EQ(a[1], 2);
1097  CHECK_EQ(a[2], 3);
1098  CHECK_EQ(a[3], 4);
1099  CHECK_EQ(a[4], 5);
1100  CHECK_EQ(a[5], 6);
1101
1102  memset(a, 0, sizeof(0));
1103  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1104                                                            &a[0],  &a[1],  &a[2],  &a[3],
1105                                                            &a[4],  &a[5],  &a[6]));
1106  CHECK_EQ(a[0], 1);
1107  CHECK_EQ(a[1], 2);
1108  CHECK_EQ(a[2], 3);
1109  CHECK_EQ(a[3], 4);
1110  CHECK_EQ(a[4], 5);
1111  CHECK_EQ(a[5], 6);
1112  CHECK_EQ(a[6], 7);
1113
1114  memset(a, 0, sizeof(0));
1115  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1116           "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1117               "1234567890123456",
1118               &a[0],  &a[1],  &a[2],  &a[3],
1119               &a[4],  &a[5],  &a[6],  &a[7],
1120               &a[8],  &a[9],  &a[10], &a[11],
1121               &a[12], &a[13], &a[14], &a[15]));
1122  CHECK_EQ(a[0], 1);
1123  CHECK_EQ(a[1], 2);
1124  CHECK_EQ(a[2], 3);
1125  CHECK_EQ(a[3], 4);
1126  CHECK_EQ(a[4], 5);
1127  CHECK_EQ(a[5], 6);
1128  CHECK_EQ(a[6], 7);
1129  CHECK_EQ(a[7], 8);
1130  CHECK_EQ(a[8], 9);
1131  CHECK_EQ(a[9], 0);
1132  CHECK_EQ(a[10], 1);
1133  CHECK_EQ(a[11], 2);
1134  CHECK_EQ(a[12], 3);
1135  CHECK_EQ(a[13], 4);
1136  CHECK_EQ(a[14], 5);
1137  CHECK_EQ(a[15], 6);
1138
1139  /***** PartialMatch *****/
1140
1141  printf("Testing PartialMatch\n");
1142
1143  CHECK(RE("h.*o").PartialMatch("hello"));
1144  CHECK(RE("h.*o").PartialMatch("othello"));
1145  CHECK(RE("h.*o").PartialMatch("hello!"));
1146  CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1147
1148  /***** other tests *****/
1149
1150  RadixTests();
1151  TestReplace();
1152  TestExtract();
1153  TestConsume();
1154  TestFindAndConsume();
1155  TestQuoteMetaAll();
1156  TestMatchNumberPeculiarity();
1157
1158  // Check the pattern() accessor
1159  {
1160    const string kPattern = "http://([^/]+)/.*";
1161    const RE re(kPattern);
1162    CHECK_EQ(kPattern, re.pattern());
1163  }
1164
1165  // Check RE error field.
1166  {
1167    RE re("foo");
1168    CHECK(re.error().empty());  // Must have no error
1169  }
1170
1171#ifdef SUPPORT_UTF8
1172  // Check UTF-8 handling
1173  {
1174    printf("Testing UTF-8 handling\n");
1175
1176    // Three Japanese characters (nihongo)
1177    const unsigned char utf8_string[] = {
1178         0xe6, 0x97, 0xa5, // 65e5
1179         0xe6, 0x9c, 0xac, // 627c
1180         0xe8, 0xaa, 0x9e, // 8a9e
1181         0
1182    };
1183    const unsigned char utf8_pattern[] = {
1184         '.',
1185         0xe6, 0x9c, 0xac, // 627c
1186         '.',
1187         0
1188    };
1189
1190    // Both should match in either mode, bytes or UTF-8
1191    RE re_test1(".........");
1192    CHECK(re_test1.FullMatch(utf8_string));
1193    RE re_test2("...", pcrecpp::UTF8());
1194    CHECK(re_test2.FullMatch(utf8_string));
1195
1196    // Check that '.' matches one byte or UTF-8 character
1197    // according to the mode.
1198    string ss;
1199    RE re_test3("(.)");
1200    CHECK(re_test3.PartialMatch(utf8_string, &ss));
1201    CHECK_EQ(ss, string("\xe6"));
1202    RE re_test4("(.)", pcrecpp::UTF8());
1203    CHECK(re_test4.PartialMatch(utf8_string, &ss));
1204    CHECK_EQ(ss, string("\xe6\x97\xa5"));
1205
1206    // Check that string matches itself in either mode
1207    RE re_test5(utf8_string);
1208    CHECK(re_test5.FullMatch(utf8_string));
1209    RE re_test6(utf8_string, pcrecpp::UTF8());
1210    CHECK(re_test6.FullMatch(utf8_string));
1211
1212    // Check that pattern matches string only in UTF8 mode
1213    RE re_test7(utf8_pattern);
1214    CHECK(!re_test7.FullMatch(utf8_string));
1215    RE re_test8(utf8_pattern, pcrecpp::UTF8());
1216    CHECK(re_test8.FullMatch(utf8_string));
1217  }
1218
1219  // Check that ungreedy, UTF8 regular expressions don't match when they
1220  // oughtn't -- see bug 82246.
1221  {
1222    // This code always worked.
1223    const char* pattern = "\\w+X";
1224    const string target = "a aX";
1225    RE match_sentence(pattern);
1226    RE match_sentence_re(pattern, pcrecpp::UTF8());
1227
1228    CHECK(!match_sentence.FullMatch(target));
1229    CHECK(!match_sentence_re.FullMatch(target));
1230  }
1231
1232  {
1233    const char* pattern = "(?U)\\w+X";
1234    const string target = "a aX";
1235    RE match_sentence(pattern);
1236    RE match_sentence_re(pattern, pcrecpp::UTF8());
1237
1238    CHECK(!match_sentence.FullMatch(target));
1239    CHECK(!match_sentence_re.FullMatch(target));
1240  }
1241#endif  /* def SUPPORT_UTF8 */
1242
1243  printf("Testing error reporting\n");
1244
1245  { RE re("a\\1"); CHECK(!re.error().empty()); }
1246  {
1247    RE re("a[x");
1248    CHECK(!re.error().empty());
1249  }
1250  {
1251    RE re("a[z-a]");
1252    CHECK(!re.error().empty());
1253  }
1254  {
1255    RE re("a[[:foobar:]]");
1256    CHECK(!re.error().empty());
1257  }
1258  {
1259    RE re("a(b");
1260    CHECK(!re.error().empty());
1261  }
1262  {
1263    RE re("a\\");
1264    CHECK(!re.error().empty());
1265  }
1266
1267  // Test that recursion is stopped
1268  TestRecursion();
1269
1270  // Test Options
1271  if (getenv("VERBOSE_TEST") != NULL)
1272    VERBOSE_TEST  = true;
1273  TestOptions();
1274
1275  // Test the constructors
1276  TestConstructors();
1277
1278  // Done
1279  printf("OK\n");
1280
1281  return 0;
1282}
1283