1// -*- coding: utf-8 -*-
2//
3// Copyright (c) 2005 - 2010, Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10//     * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12//     * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16//     * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//
32// Author: Sanjay Ghemawat
33//
34// TODO: Test extractions for PartialMatch/Consume
35
36#ifdef HAVE_CONFIG_H
37#include "config.h"
38#endif
39
40#include <stdio.h>
41#include <string.h>      /* for memset and strcmp */
42#include <cassert>
43#include <vector>
44#include "pcrecpp.h"
45
46using pcrecpp::StringPiece;
47using pcrecpp::RE;
48using pcrecpp::RE_Options;
49using pcrecpp::Hex;
50using pcrecpp::Octal;
51using pcrecpp::CRadix;
52
53static bool VERBOSE_TEST  = false;
54
55// CHECK dies with a fatal error if condition is not true.  It is *not*
56// controlled by NDEBUG, so the check will be executed regardless of
57// compilation mode.  Therefore, it is safe to do things like:
58//    CHECK_EQ(fp->Write(x), 4)
59#define CHECK(condition) do {                           \
60  if (!(condition)) {                                   \
61    fprintf(stderr, "%s:%d: Check failed: %s\n",        \
62            __FILE__, __LINE__, #condition);            \
63    exit(1);                                            \
64  }                                                     \
65} while (0)
66
67#define CHECK_EQ(a, b)   CHECK(a == b)
68
69static void Timing1(int num_iters) {
70  // Same pattern lots of times
71  RE pattern("ruby:\\d+");
72  StringPiece p("ruby:1234");
73  for (int j = num_iters; j > 0; j--) {
74    CHECK(pattern.FullMatch(p));
75  }
76}
77
78static void Timing2(int num_iters) {
79  // Same pattern lots of times
80  RE pattern("ruby:(\\d+)");
81  int i;
82  for (int j = num_iters; j > 0; j--) {
83    CHECK(pattern.FullMatch("ruby:1234", &i));
84    CHECK_EQ(i, 1234);
85  }
86}
87
88static void Timing3(int num_iters) {
89  string text_string;
90  for (int j = num_iters; j > 0; j--) {
91    text_string += "this is another line\n";
92  }
93
94  RE line_matcher(".*\n");
95  string line;
96  StringPiece text(text_string);
97  int counter = 0;
98  while (line_matcher.Consume(&text)) {
99    counter++;
100  }
101  printf("Matched %d lines\n", counter);
102}
103
104#if 0  // uncomment this if you have a way of defining VirtualProcessSize()
105
106static void LeakTest() {
107  // Check for memory leaks
108  unsigned long long initial_size = 0;
109  for (int i = 0; i < 100000; i++) {
110    if (i == 50000) {
111      initial_size = VirtualProcessSize();
112      printf("Size after 50000: %llu\n", initial_size);
113    }
114    char buf[100];  // definitely big enough
115    sprintf(buf, "pat%09d", i);
116    RE newre(buf);
117  }
118  uint64 final_size = VirtualProcessSize();
119  printf("Size after 100000: %llu\n", final_size);
120  const double growth = double(final_size - initial_size) / final_size;
121  printf("Growth: %0.2f%%", growth * 100);
122  CHECK(growth < 0.02);       // Allow < 2% growth
123}
124
125#endif
126
127static void RadixTests() {
128  printf("Testing hex\n");
129
130#define CHECK_HEX(type, value) \
131  do { \
132    type v; \
133    CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134    CHECK_EQ(v, 0x ## value); \
135    CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136    CHECK_EQ(v, 0x ## value); \
137  } while(0)
138
139  CHECK_HEX(short,              2bad);
140  CHECK_HEX(unsigned short,     2badU);
141  CHECK_HEX(int,                dead);
142  CHECK_HEX(unsigned int,       deadU);
143  CHECK_HEX(long,               7eadbeefL);
144  CHECK_HEX(unsigned long,      deadbeefUL);
145#ifdef HAVE_LONG_LONG
146  CHECK_HEX(long long,          12345678deadbeefLL);
147#endif
148#ifdef HAVE_UNSIGNED_LONG_LONG
149  CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
150#endif
151
152#undef CHECK_HEX
153
154  printf("Testing octal\n");
155
156#define CHECK_OCTAL(type, value) \
157  do { \
158    type v; \
159    CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160    CHECK_EQ(v, 0 ## value); \
161    CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162    CHECK_EQ(v, 0 ## value); \
163  } while(0)
164
165  CHECK_OCTAL(short,              77777);
166  CHECK_OCTAL(unsigned short,     177777U);
167  CHECK_OCTAL(int,                17777777777);
168  CHECK_OCTAL(unsigned int,       37777777777U);
169  CHECK_OCTAL(long,               17777777777L);
170  CHECK_OCTAL(unsigned long,      37777777777UL);
171#ifdef HAVE_LONG_LONG
172  CHECK_OCTAL(long long,          777777777777777777777LL);
173#endif
174#ifdef HAVE_UNSIGNED_LONG_LONG
175  CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
176#endif
177
178#undef CHECK_OCTAL
179
180  printf("Testing decimal\n");
181
182#define CHECK_DECIMAL(type, value) \
183  do { \
184    type v; \
185    CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186    CHECK_EQ(v, value); \
187    CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188    CHECK_EQ(v, value); \
189  } while(0)
190
191  CHECK_DECIMAL(short,              -1);
192  CHECK_DECIMAL(unsigned short,     9999);
193  CHECK_DECIMAL(int,                -1000);
194  CHECK_DECIMAL(unsigned int,       12345U);
195  CHECK_DECIMAL(long,               -10000000L);
196  CHECK_DECIMAL(unsigned long,      3083324652U);
197#ifdef HAVE_LONG_LONG
198  CHECK_DECIMAL(long long,          -100000000000000LL);
199#endif
200#ifdef HAVE_UNSIGNED_LONG_LONG
201  CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
202#endif
203
204#undef CHECK_DECIMAL
205
206}
207
208static void TestReplace() {
209  printf("Testing Replace\n");
210
211  struct ReplaceTest {
212    const char *regexp;
213    const char *rewrite;
214    const char *original;
215    const char *single;
216    const char *global;
217    int global_count;         // the expected return value from ReplaceAll
218  };
219  static const ReplaceTest tests[] = {
220    { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221      "\\2\\1ay",
222      "the quick brown fox jumps over the lazy dogs.",
223      "ethay quick brown fox jumps over the lazy dogs.",
224      "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
225      9 },
226    { "\\w+",
227      "\\0-NOSPAM",
228      "paul.haahr@google.com",
229      "paul-NOSPAM.haahr@google.com",
230      "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
231      4 },
232    { "^",
233      "(START)",
234      "foo",
235      "(START)foo",
236      "(START)foo",
237      1 },
238    { "^",
239      "(START)",
240      "",
241      "(START)",
242      "(START)",
243      1 },
244    { "$",
245      "(END)",
246      "",
247      "(END)",
248      "(END)",
249      1 },
250    { "b",
251      "bb",
252      "ababababab",
253      "abbabababab",
254      "abbabbabbabbabb",
255       5 },
256    { "b",
257      "bb",
258      "bbbbbb",
259      "bbbbbbb",
260      "bbbbbbbbbbbb",
261      6 },
262    { "b+",
263      "bb",
264      "bbbbbb",
265      "bb",
266      "bb",
267      1 },
268    { "b*",
269      "bb",
270      "bbbbbb",
271      "bb",
272      "bbbb",
273      2 },
274    { "b*",
275      "bb",
276      "aaaaa",
277      "bbaaaaa",
278      "bbabbabbabbabbabb",
279      6 },
280    { "b*",
281      "bb",
282      "aa\naa\n",
283      "bbaa\naa\n",
284      "bbabbabb\nbbabbabb\nbb",
285      7 },
286    { "b*",
287      "bb",
288      "aa\raa\r",
289      "bbaa\raa\r",
290      "bbabbabb\rbbabbabb\rbb",
291      7 },
292    { "b*",
293      "bb",
294      "aa\r\naa\r\n",
295      "bbaa\r\naa\r\n",
296      "bbabbabb\r\nbbabbabb\r\nbb",
297      7 },
298    // Check empty-string matching (it's tricky!)
299    { "aa|b*",
300      "@",
301      "aa",
302      "@",
303      "@@",
304      2 },
305    { "b*|aa",
306      "@",
307      "aa",
308      "@aa",
309      "@@@",
310      3 },
311#ifdef SUPPORT_UTF8
312    { "b*",
313      "bb",
314      "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
315      "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316      "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
317      5 },
318    { "b*",
319      "bb",
320      "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
321      "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322      ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323       "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
324      9 },
325#endif
326    { "", NULL, NULL, NULL, NULL, 0 }
327  };
328
329#ifdef SUPPORT_UTF8
330  const bool support_utf8 = true;
331#else
332  const bool support_utf8 = false;
333#endif
334
335  for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
336    RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
337    assert(re.error().empty());
338    string one(t->original);
339    CHECK(re.Replace(t->rewrite, &one));
340    CHECK_EQ(one, t->single);
341    string all(t->original);
342    const int replace_count = re.GlobalReplace(t->rewrite, &all);
343    CHECK_EQ(all, t->global);
344    CHECK_EQ(replace_count, t->global_count);
345  }
346
347  // One final test: test \r\n replacement when we're not in CRLF mode
348  {
349    RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
350    assert(re.error().empty());
351    string all("aa\r\naa\r\n");
352    CHECK_EQ(re.GlobalReplace("bb", &all), 9);
353    CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
354  }
355  {
356    RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
357    assert(re.error().empty());
358    string all("aa\r\naa\r\n");
359    CHECK_EQ(re.GlobalReplace("bb", &all), 9);
360    CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
361  }
362  // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
363  //       Alas, the answer depends on how pcre was compiled.
364}
365
366static void TestExtract() {
367  printf("Testing Extract\n");
368
369  string s;
370
371  CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
372  CHECK_EQ(s, "kremvax!boris");
373
374  // check the RE interface as well
375  CHECK(RE(".*").Extract("'\\0'", "foo", &s));
376  CHECK_EQ(s, "'foo'");
377  CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
378  CHECK_EQ(s, "'foo'");
379}
380
381static void TestConsume() {
382  printf("Testing Consume\n");
383
384  string word;
385
386  string s("   aaa b!@#$@#$cccc");
387  StringPiece input(s);
388
389  RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
390  CHECK(r.Consume(&input, &word));
391  CHECK_EQ(word, "aaa");
392  CHECK(r.Consume(&input, &word));
393  CHECK_EQ(word, "b");
394  CHECK(! r.Consume(&input, &word));
395}
396
397static void TestFindAndConsume() {
398  printf("Testing FindAndConsume\n");
399
400  string word;
401
402  string s("   aaa b!@#$@#$cccc");
403  StringPiece input(s);
404
405  RE r("(\\w+)");      // matches a word
406  CHECK(r.FindAndConsume(&input, &word));
407  CHECK_EQ(word, "aaa");
408  CHECK(r.FindAndConsume(&input, &word));
409  CHECK_EQ(word, "b");
410  CHECK(r.FindAndConsume(&input, &word));
411  CHECK_EQ(word, "cccc");
412  CHECK(! r.FindAndConsume(&input, &word));
413}
414
415static void TestMatchNumberPeculiarity() {
416  printf("Testing match-number peculiarity\n");
417
418  string word1;
419  string word2;
420  string word3;
421
422  RE r("(foo)|(bar)|(baz)");
423  CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
424  CHECK_EQ(word1, "foo");
425  CHECK_EQ(word2, "");
426  CHECK_EQ(word3, "");
427  CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
428  CHECK_EQ(word1, "");
429  CHECK_EQ(word2, "bar");
430  CHECK_EQ(word3, "");
431  CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
432  CHECK_EQ(word1, "");
433  CHECK_EQ(word2, "");
434  CHECK_EQ(word3, "baz");
435  CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
436
437  string a;
438  CHECK(RE("(foo)|hello").FullMatch("hello", &a));
439  CHECK_EQ(a, "");
440}
441
442static void TestRecursion() {
443  printf("Testing recursion\n");
444
445  // Get one string that passes (sometimes), one that never does.
446  string text_good("abcdefghijk");
447  string text_bad("acdefghijkl");
448
449  // According to pcretest, matching text_good against (\w+)*b
450  // requires match_limit of at least 8192, and match_recursion_limit
451  // of at least 37.
452
453  RE_Options options_ml;
454  options_ml.set_match_limit(8192);
455  RE re("(\\w+)*b", options_ml);
456  CHECK(re.PartialMatch(text_good) == true);
457  CHECK(re.PartialMatch(text_bad) == false);
458  CHECK(re.FullMatch(text_good) == false);
459  CHECK(re.FullMatch(text_bad) == false);
460
461  options_ml.set_match_limit(1024);
462  RE re2("(\\w+)*b", options_ml);
463  CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
464  CHECK(re2.PartialMatch(text_bad) == false);
465  CHECK(re2.FullMatch(text_good) == false);
466  CHECK(re2.FullMatch(text_bad) == false);
467
468  RE_Options options_mlr;
469  options_mlr.set_match_limit_recursion(50);
470  RE re3("(\\w+)*b", options_mlr);
471  CHECK(re3.PartialMatch(text_good) == true);
472  CHECK(re3.PartialMatch(text_bad) == false);
473  CHECK(re3.FullMatch(text_good) == false);
474  CHECK(re3.FullMatch(text_bad) == false);
475
476  options_mlr.set_match_limit_recursion(10);
477  RE re4("(\\w+)*b", options_mlr);
478  CHECK(re4.PartialMatch(text_good) == false);
479  CHECK(re4.PartialMatch(text_bad) == false);
480  CHECK(re4.FullMatch(text_good) == false);
481  CHECK(re4.FullMatch(text_bad) == false);
482}
483
484// A meta-quoted string, interpreted as a pattern, should always match
485// the original unquoted string.
486static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
487  string quoted = RE::QuoteMeta(unquoted);
488  RE re(quoted, options);
489  CHECK(re.FullMatch(unquoted));
490}
491
492// A string containing meaningful regexp characters, which is then meta-
493// quoted, should not generally match a string the unquoted string does.
494static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
495                                  RE_Options options = RE_Options()) {
496  string quoted = RE::QuoteMeta(unquoted);
497  RE re(quoted, options);
498  CHECK(!re.FullMatch(should_not_match));
499}
500
501// Tests that quoted meta characters match their original strings,
502// and that a few things that shouldn't match indeed do not.
503static void TestQuotaMetaSimple() {
504  TestQuoteMeta("foo");
505  TestQuoteMeta("foo.bar");
506  TestQuoteMeta("foo\\.bar");
507  TestQuoteMeta("[1-9]");
508  TestQuoteMeta("1.5-2.0?");
509  TestQuoteMeta("\\d");
510  TestQuoteMeta("Who doesn't like ice cream?");
511  TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
512  TestQuoteMeta("((?!)xxx).*yyy");
513  TestQuoteMeta("([");
514  TestQuoteMeta(string("foo\0bar", 7));
515}
516
517static void TestQuoteMetaSimpleNegative() {
518  NegativeTestQuoteMeta("foo", "bar");
519  NegativeTestQuoteMeta("...", "bar");
520  NegativeTestQuoteMeta("\\.", ".");
521  NegativeTestQuoteMeta("\\.", "..");
522  NegativeTestQuoteMeta("(a)", "a");
523  NegativeTestQuoteMeta("(a|b)", "a");
524  NegativeTestQuoteMeta("(a|b)", "(a)");
525  NegativeTestQuoteMeta("(a|b)", "a|b");
526  NegativeTestQuoteMeta("[0-9]", "0");
527  NegativeTestQuoteMeta("[0-9]", "0-9");
528  NegativeTestQuoteMeta("[0-9]", "[9]");
529  NegativeTestQuoteMeta("((?!)xxx)", "xxx");
530}
531
532static void TestQuoteMetaLatin1() {
533  TestQuoteMeta("3\xb2 = 9");
534}
535
536static void TestQuoteMetaUtf8() {
537#ifdef SUPPORT_UTF8
538  TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
539  TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
540  TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
541  TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
542  TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
543  TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
544  TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
545  NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
546                        "27\\\xc2\\\xb0",
547                        pcrecpp::UTF8());
548#endif
549}
550
551static void TestQuoteMetaAll() {
552  printf("Testing QuoteMeta\n");
553  TestQuotaMetaSimple();
554  TestQuoteMetaSimpleNegative();
555  TestQuoteMetaLatin1();
556  TestQuoteMetaUtf8();
557}
558
559//
560// Options tests contributed by
561// Giuseppe Maxia, CTO, Stardata s.r.l.
562// July 2005
563//
564static void GetOneOptionResult(
565                const char *option_name,
566                const char *regex,
567                const char *str,
568                RE_Options options,
569                bool full,
570                string expected) {
571
572  printf("Testing Option <%s>\n", option_name);
573  if(VERBOSE_TEST)
574    printf("/%s/ finds \"%s\" within \"%s\" \n",
575                    regex,
576                    expected.c_str(),
577                    str);
578  string captured("");
579  if (full)
580    RE(regex,options).FullMatch(str, &captured);
581  else
582    RE(regex,options).PartialMatch(str, &captured);
583  CHECK_EQ(captured, expected);
584}
585
586static void TestOneOption(
587                const char *option_name,
588                const char *regex,
589                const char *str,
590                RE_Options options,
591                bool full,
592                bool assertive = true) {
593
594  printf("Testing Option <%s>\n", option_name);
595  if (VERBOSE_TEST)
596    printf("'%s' %s /%s/ \n",
597                  str,
598                  (assertive? "matches" : "doesn't match"),
599                  regex);
600  if (assertive) {
601    if (full)
602      CHECK(RE(regex,options).FullMatch(str));
603    else
604      CHECK(RE(regex,options).PartialMatch(str));
605  } else {
606    if (full)
607      CHECK(!RE(regex,options).FullMatch(str));
608    else
609      CHECK(!RE(regex,options).PartialMatch(str));
610  }
611}
612
613static void Test_CASELESS() {
614  RE_Options options;
615  RE_Options options2;
616
617  options.set_caseless(true);
618  TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
619  TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
620  TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
621
622  TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
623  TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
624  options.set_caseless(false);
625  TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
626}
627
628static void Test_MULTILINE() {
629  RE_Options options;
630  RE_Options options2;
631  const char *str = "HELLO\n" "cruel\n" "world\n";
632
633  options.set_multiline(true);
634  TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
635  TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
636  TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
637  options.set_multiline(false);
638  TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
639}
640
641static void Test_DOTALL() {
642  RE_Options options;
643  RE_Options options2;
644  const char *str = "HELLO\n" "cruel\n" "world";
645
646  options.set_dotall(true);
647  TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
648  TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
649  TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
650  options.set_dotall(false);
651  TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
652}
653
654static void Test_DOLLAR_ENDONLY() {
655  RE_Options options;
656  RE_Options options2;
657  const char *str = "HELLO world\n";
658
659  TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
660  options.set_dollar_endonly(true);
661  TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
662  TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
663}
664
665static void Test_EXTRA() {
666  RE_Options options;
667  const char *str = "HELLO";
668
669  options.set_extra(true);
670  TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
671  TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
672  options.set_extra(false);
673  TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
674}
675
676static void Test_EXTENDED() {
677  RE_Options options;
678  RE_Options options2;
679  const char *str = "HELLO world";
680
681  options.set_extended(true);
682  TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
683  TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
684  TestOneOption("EXTENDED (class)",
685                    "^ HE L{2} O "
686                    "\\s+        "
687                    "\\w+ $      ",
688                    str,
689                    options,
690                    false);
691
692  TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
693  TestOneOption("EXTENDED (function)",
694                    "^ HE L{2} O "
695                    "\\s+        "
696                    "\\w+ $      ",
697                    str,
698                    pcrecpp::EXTENDED(),
699                    false);
700
701  options.set_extended(false);
702  TestOneOption("no EXTENDED", "HELLO world", str, options, false);
703}
704
705static void Test_NO_AUTO_CAPTURE() {
706  RE_Options options;
707  const char *str = "HELLO world";
708  string captured;
709
710  printf("Testing Option <no NO_AUTO_CAPTURE>\n");
711  if (VERBOSE_TEST)
712    printf("parentheses capture text\n");
713  RE re("(world|universe)$", options);
714  CHECK(re.Extract("\\1", str , &captured));
715  CHECK_EQ(captured, "world");
716  options.set_no_auto_capture(true);
717  printf("testing Option <NO_AUTO_CAPTURE>\n");
718  if (VERBOSE_TEST)
719    printf("parentheses do not capture text\n");
720  re.Extract("\\1",str, &captured );
721  CHECK_EQ(captured, "world");
722}
723
724static void Test_UNGREEDY() {
725  RE_Options options;
726  const char *str = "HELLO, 'this' is the 'world'";
727
728  options.set_ungreedy(true);
729  GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
730  GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
731  GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
732
733  options.set_ungreedy(false);
734  GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
735  GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
736}
737
738static void Test_all_options() {
739  const char *str = "HELLO\n" "cruel\n" "world";
740  RE_Options options;
741  options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
742
743  TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
744  options.set_all_options(0);
745  TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
746  options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
747
748  TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
749  TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
750                  " ^ c r u e l $ ",
751                  str,
752                  RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
753                  false);
754
755  TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
756                  " ^ c r u e l $ ",
757                  str,
758                  RE_Options()
759                       .set_multiline(true)
760                       .set_extended(true),
761                  false);
762
763  options.set_all_options(0);
764  TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
765
766}
767
768static void TestOptions() {
769  printf("Testing Options\n");
770  Test_CASELESS();
771  Test_MULTILINE();
772  Test_DOTALL();
773  Test_DOLLAR_ENDONLY();
774  Test_EXTENDED();
775  Test_NO_AUTO_CAPTURE();
776  Test_UNGREEDY();
777  Test_EXTRA();
778  Test_all_options();
779}
780
781static void TestConstructors() {
782  printf("Testing constructors\n");
783
784  RE_Options options;
785  options.set_dotall(true);
786  const char *str = "HELLO\n" "cruel\n" "world";
787
788  RE orig("HELLO.*world", options);
789  CHECK(orig.FullMatch(str));
790
791  RE copy1(orig);
792  CHECK(copy1.FullMatch(str));
793
794  RE copy2("not a match");
795  CHECK(!copy2.FullMatch(str));
796  copy2 = copy1;
797  CHECK(copy2.FullMatch(str));
798  copy2 = orig;
799  CHECK(copy2.FullMatch(str));
800
801  // Make sure when we assign to ourselves, nothing bad happens
802  orig = orig;
803  copy1 = copy1;
804  copy2 = copy2;
805  CHECK(orig.FullMatch(str));
806  CHECK(copy1.FullMatch(str));
807  CHECK(copy2.FullMatch(str));
808}
809
810int main(int argc, char** argv) {
811  // Treat any flag as --help
812  if (argc > 1 && argv[1][0] == '-') {
813    printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
814           "       If 'timingX ###' is specified, run the given timing test\n"
815           "       with the given number of iterations, rather than running\n"
816           "       the default corectness test.\n", argv[0]);
817    return 0;
818  }
819
820  if (argc > 1) {
821    if ( argc == 2 || atoi(argv[2]) == 0) {
822      printf("timing mode needs a num-iters argument\n");
823      return 1;
824    }
825    if (!strcmp(argv[1], "timing1"))
826      Timing1(atoi(argv[2]));
827    else if (!strcmp(argv[1], "timing2"))
828      Timing2(atoi(argv[2]));
829    else if (!strcmp(argv[1], "timing3"))
830      Timing3(atoi(argv[2]));
831    else
832      printf("Unknown argument '%s'\n", argv[1]);
833    return 0;
834  }
835
836  printf("PCRE C++ wrapper tests\n");
837  printf("Testing FullMatch\n");
838
839  int i;
840  string s;
841
842  /***** FullMatch with no args *****/
843
844  CHECK(RE("h.*o").FullMatch("hello"));
845  CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
846  CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
847  CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
848  CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
849  CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
850
851  /***** FullMatch with args *****/
852
853  // Zero-arg
854  CHECK(RE("\\d+").FullMatch("1001"));
855
856  // Single-arg
857  CHECK(RE("(\\d+)").FullMatch("1001",   &i));
858  CHECK_EQ(i, 1001);
859  CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
860  CHECK_EQ(i, -123);
861  CHECK(!RE("()\\d+").FullMatch("10", &i));
862  CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
863                                &i));
864
865  // Digits surrounding integer-arg
866  CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
867  CHECK_EQ(i, 23);
868  CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
869  CHECK_EQ(i, 1);
870  CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
871  CHECK_EQ(i, -1);
872  CHECK(RE("(\\d)").PartialMatch("1234", &i));
873  CHECK_EQ(i, 1);
874  CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
875  CHECK_EQ(i, -1);
876
877  // String-arg
878  CHECK(RE("h(.*)o").FullMatch("hello", &s));
879  CHECK_EQ(s, string("ell"));
880
881  // StringPiece-arg
882  StringPiece sp;
883  CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
884  CHECK_EQ(sp.size(), 4);
885  CHECK(memcmp(sp.data(), "ruby", 4) == 0);
886  CHECK_EQ(i, 1234);
887
888  // Multi-arg
889  CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
890  CHECK_EQ(s, string("ruby"));
891  CHECK_EQ(i, 1234);
892
893  // Ignore non-void* NULL arg
894  CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
895  CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
896  CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
897  CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
898#ifdef HAVE_LONG_LONG
899  CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
900#endif
901  CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
902  CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
903
904  // Fail on non-void* NULL arg if the match doesn't parse for the given type.
905  CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
906  CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
907  CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
908  CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
909  CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
910
911  // Ignored arg
912  CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
913  CHECK_EQ(s, string("ruby"));
914  CHECK_EQ(i, 1234);
915
916  // Type tests
917  {
918    char c;
919    CHECK(RE("(H)ello").FullMatch("Hello", &c));
920    CHECK_EQ(c, 'H');
921  }
922  {
923    unsigned char c;
924    CHECK(RE("(H)ello").FullMatch("Hello", &c));
925    CHECK_EQ(c, static_cast<unsigned char>('H'));
926  }
927  {
928    short v;
929    CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
930    CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
931    CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
932    CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
933    CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
934    CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
935  }
936  {
937    unsigned short v;
938    CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
939    CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
940    CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
941    CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
942  }
943  {
944    int v;
945    static const int max_value = 0x7fffffff;
946    static const int min_value = -max_value - 1;
947    CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
948    CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
949    CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
950    CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
951    CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
952    CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
953  }
954  {
955    unsigned int v;
956    static const unsigned int max_value = 0xfffffffful;
957    CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
958    CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
959    CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
960  }
961#ifdef HAVE_LONG_LONG
962# if defined(__MINGW__) || defined(__MINGW32__)
963#   define LLD "%I64d"
964#   define LLU "%I64u"
965# else
966#   define LLD "%lld"
967#   define LLU "%llu"
968# endif
969  {
970    long long v;
971    static const long long max_value = 0x7fffffffffffffffLL;
972    static const long long min_value = -max_value - 1;
973    char buf[32];  // definitely big enough for a long long
974
975    CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
976    CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
977
978    sprintf(buf, LLD, max_value);
979    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
980
981    sprintf(buf, LLD, min_value);
982    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
983
984    sprintf(buf, LLD, max_value);
985    assert(buf[strlen(buf)-1] != '9');
986    buf[strlen(buf)-1]++;
987    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
988
989    sprintf(buf, LLD, min_value);
990    assert(buf[strlen(buf)-1] != '9');
991    buf[strlen(buf)-1]++;
992    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
993  }
994#endif
995#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
996  {
997    unsigned long long v;
998    long long v2;
999    static const unsigned long long max_value = 0xffffffffffffffffULL;
1000    char buf[32];  // definitely big enough for a unsigned long long
1001
1002    CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1003    CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1004
1005    sprintf(buf, LLU, max_value);
1006    CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1007
1008    assert(buf[strlen(buf)-1] != '9');
1009    buf[strlen(buf)-1]++;
1010    CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1011  }
1012#endif
1013  {
1014    float v;
1015    CHECK(RE("(.*)").FullMatch("100", &v));
1016    CHECK(RE("(.*)").FullMatch("-100.", &v));
1017    CHECK(RE("(.*)").FullMatch("1e23", &v));
1018  }
1019  {
1020    double v;
1021    CHECK(RE("(.*)").FullMatch("100", &v));
1022    CHECK(RE("(.*)").FullMatch("-100.", &v));
1023    CHECK(RE("(.*)").FullMatch("1e23", &v));
1024  }
1025
1026  // Check that matching is fully anchored
1027  CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
1028  CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
1029  CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1030  CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1031
1032  // Braces
1033  CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1034  CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1035  CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1036
1037  // Complicated RE
1038  CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1039  CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1040  CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1041  CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1042
1043  // Check full-match handling (needs '$' tacked on internally)
1044  CHECK(RE("fo|foo").FullMatch("fo"));
1045  CHECK(RE("fo|foo").FullMatch("foo"));
1046  CHECK(RE("fo|foo$").FullMatch("fo"));
1047  CHECK(RE("fo|foo$").FullMatch("foo"));
1048  CHECK(RE("foo$").FullMatch("foo"));
1049  CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1050  CHECK(!RE("fo|bar").FullMatch("fox"));
1051
1052  // Uncomment the following if we change the handling of '$' to
1053  // prevent it from matching a trailing newline
1054  if (false) {
1055    // Check that we don't get bitten by pcre's special handling of a
1056    // '\n' at the end of the string matching '$'
1057    CHECK(!RE("foo$").PartialMatch("foo\n"));
1058  }
1059
1060  // Number of args
1061  int a[16];
1062  CHECK(RE("").FullMatch(""));
1063
1064  memset(a, 0, sizeof(0));
1065  CHECK(RE("(\\d){1}").FullMatch("1",
1066                                 &a[0]));
1067  CHECK_EQ(a[0], 1);
1068
1069  memset(a, 0, sizeof(0));
1070  CHECK(RE("(\\d)(\\d)").FullMatch("12",
1071                                   &a[0],  &a[1]));
1072  CHECK_EQ(a[0], 1);
1073  CHECK_EQ(a[1], 2);
1074
1075  memset(a, 0, sizeof(0));
1076  CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1077                                        &a[0],  &a[1],  &a[2]));
1078  CHECK_EQ(a[0], 1);
1079  CHECK_EQ(a[1], 2);
1080  CHECK_EQ(a[2], 3);
1081
1082  memset(a, 0, sizeof(0));
1083  CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1084                                             &a[0],  &a[1],  &a[2],  &a[3]));
1085  CHECK_EQ(a[0], 1);
1086  CHECK_EQ(a[1], 2);
1087  CHECK_EQ(a[2], 3);
1088  CHECK_EQ(a[3], 4);
1089
1090  memset(a, 0, sizeof(0));
1091  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1092                                                  &a[0],  &a[1],  &a[2],
1093                                                  &a[3],  &a[4]));
1094  CHECK_EQ(a[0], 1);
1095  CHECK_EQ(a[1], 2);
1096  CHECK_EQ(a[2], 3);
1097  CHECK_EQ(a[3], 4);
1098  CHECK_EQ(a[4], 5);
1099
1100  memset(a, 0, sizeof(0));
1101  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1102                                                       &a[0],  &a[1],  &a[2],
1103                                                       &a[3],  &a[4],  &a[5]));
1104  CHECK_EQ(a[0], 1);
1105  CHECK_EQ(a[1], 2);
1106  CHECK_EQ(a[2], 3);
1107  CHECK_EQ(a[3], 4);
1108  CHECK_EQ(a[4], 5);
1109  CHECK_EQ(a[5], 6);
1110
1111  memset(a, 0, sizeof(0));
1112  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1113                                                            &a[0],  &a[1],  &a[2],  &a[3],
1114                                                            &a[4],  &a[5],  &a[6]));
1115  CHECK_EQ(a[0], 1);
1116  CHECK_EQ(a[1], 2);
1117  CHECK_EQ(a[2], 3);
1118  CHECK_EQ(a[3], 4);
1119  CHECK_EQ(a[4], 5);
1120  CHECK_EQ(a[5], 6);
1121  CHECK_EQ(a[6], 7);
1122
1123  memset(a, 0, sizeof(0));
1124  CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1125           "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1126               "1234567890123456",
1127               &a[0],  &a[1],  &a[2],  &a[3],
1128               &a[4],  &a[5],  &a[6],  &a[7],
1129               &a[8],  &a[9],  &a[10], &a[11],
1130               &a[12], &a[13], &a[14], &a[15]));
1131  CHECK_EQ(a[0], 1);
1132  CHECK_EQ(a[1], 2);
1133  CHECK_EQ(a[2], 3);
1134  CHECK_EQ(a[3], 4);
1135  CHECK_EQ(a[4], 5);
1136  CHECK_EQ(a[5], 6);
1137  CHECK_EQ(a[6], 7);
1138  CHECK_EQ(a[7], 8);
1139  CHECK_EQ(a[8], 9);
1140  CHECK_EQ(a[9], 0);
1141  CHECK_EQ(a[10], 1);
1142  CHECK_EQ(a[11], 2);
1143  CHECK_EQ(a[12], 3);
1144  CHECK_EQ(a[13], 4);
1145  CHECK_EQ(a[14], 5);
1146  CHECK_EQ(a[15], 6);
1147
1148  /***** PartialMatch *****/
1149
1150  printf("Testing PartialMatch\n");
1151
1152  CHECK(RE("h.*o").PartialMatch("hello"));
1153  CHECK(RE("h.*o").PartialMatch("othello"));
1154  CHECK(RE("h.*o").PartialMatch("hello!"));
1155  CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1156
1157  /***** other tests *****/
1158
1159  RadixTests();
1160  TestReplace();
1161  TestExtract();
1162  TestConsume();
1163  TestFindAndConsume();
1164  TestQuoteMetaAll();
1165  TestMatchNumberPeculiarity();
1166
1167  // Check the pattern() accessor
1168  {
1169    const string kPattern = "http://([^/]+)/.*";
1170    const RE re(kPattern);
1171    CHECK_EQ(kPattern, re.pattern());
1172  }
1173
1174  // Check RE error field.
1175  {
1176    RE re("foo");
1177    CHECK(re.error().empty());  // Must have no error
1178  }
1179
1180#ifdef SUPPORT_UTF8
1181  // Check UTF-8 handling
1182  {
1183    printf("Testing UTF-8 handling\n");
1184
1185    // Three Japanese characters (nihongo)
1186    const unsigned char utf8_string[] = {
1187         0xe6, 0x97, 0xa5, // 65e5
1188         0xe6, 0x9c, 0xac, // 627c
1189         0xe8, 0xaa, 0x9e, // 8a9e
1190         0
1191    };
1192    const unsigned char utf8_pattern[] = {
1193         '.',
1194         0xe6, 0x9c, 0xac, // 627c
1195         '.',
1196         0
1197    };
1198
1199    // Both should match in either mode, bytes or UTF-8
1200    RE re_test1(".........");
1201    CHECK(re_test1.FullMatch(utf8_string));
1202    RE re_test2("...", pcrecpp::UTF8());
1203    CHECK(re_test2.FullMatch(utf8_string));
1204
1205    // Check that '.' matches one byte or UTF-8 character
1206    // according to the mode.
1207    string ss;
1208    RE re_test3("(.)");
1209    CHECK(re_test3.PartialMatch(utf8_string, &ss));
1210    CHECK_EQ(ss, string("\xe6"));
1211    RE re_test4("(.)", pcrecpp::UTF8());
1212    CHECK(re_test4.PartialMatch(utf8_string, &ss));
1213    CHECK_EQ(ss, string("\xe6\x97\xa5"));
1214
1215    // Check that string matches itself in either mode
1216    RE re_test5(utf8_string);
1217    CHECK(re_test5.FullMatch(utf8_string));
1218    RE re_test6(utf8_string, pcrecpp::UTF8());
1219    CHECK(re_test6.FullMatch(utf8_string));
1220
1221    // Check that pattern matches string only in UTF8 mode
1222    RE re_test7(utf8_pattern);
1223    CHECK(!re_test7.FullMatch(utf8_string));
1224    RE re_test8(utf8_pattern, pcrecpp::UTF8());
1225    CHECK(re_test8.FullMatch(utf8_string));
1226  }
1227
1228  // Check that ungreedy, UTF8 regular expressions don't match when they
1229  // oughtn't -- see bug 82246.
1230  {
1231    // This code always worked.
1232    const char* pattern = "\\w+X";
1233    const string target = "a aX";
1234    RE match_sentence(pattern);
1235    RE match_sentence_re(pattern, pcrecpp::UTF8());
1236
1237    CHECK(!match_sentence.FullMatch(target));
1238    CHECK(!match_sentence_re.FullMatch(target));
1239  }
1240
1241  {
1242    const char* pattern = "(?U)\\w+X";
1243    const string target = "a aX";
1244    RE match_sentence(pattern);
1245    RE match_sentence_re(pattern, pcrecpp::UTF8());
1246
1247    CHECK(!match_sentence.FullMatch(target));
1248    CHECK(!match_sentence_re.FullMatch(target));
1249  }
1250#endif  /* def SUPPORT_UTF8 */
1251
1252  printf("Testing error reporting\n");
1253
1254  { RE re("a\\1"); CHECK(!re.error().empty()); }
1255  {
1256    RE re("a[x");
1257    CHECK(!re.error().empty());
1258  }
1259  {
1260    RE re("a[z-a]");
1261    CHECK(!re.error().empty());
1262  }
1263  {
1264    RE re("a[[:foobar:]]");
1265    CHECK(!re.error().empty());
1266  }
1267  {
1268    RE re("a(b");
1269    CHECK(!re.error().empty());
1270  }
1271  {
1272    RE re("a\\");
1273    CHECK(!re.error().empty());
1274  }
1275
1276  // Test that recursion is stopped
1277  TestRecursion();
1278
1279  // Test Options
1280  if (getenv("VERBOSE_TEST") != NULL)
1281    VERBOSE_TEST  = true;
1282  TestOptions();
1283
1284  // Test the constructors
1285  TestConstructors();
1286
1287  // Done
1288  printf("OK\n");
1289
1290  return 0;
1291}
1292