1// -*- coding: utf-8 -*- 2// 3// Copyright (c) 2005 - 2010, Google Inc. 4// All rights reserved. 5// 6// Redistribution and use in source and binary forms, with or without 7// modification, are permitted provided that the following conditions are 8// met: 9// 10// * Redistributions of source code must retain the above copyright 11// notice, this list of conditions and the following disclaimer. 12// * Redistributions in binary form must reproduce the above 13// copyright notice, this list of conditions and the following disclaimer 14// in the documentation and/or other materials provided with the 15// distribution. 16// * Neither the name of Google Inc. nor the names of its 17// contributors may be used to endorse or promote products derived from 18// this software without specific prior written permission. 19// 20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31// 32// Author: Sanjay Ghemawat 33// 34// TODO: Test extractions for PartialMatch/Consume 35 36#ifdef HAVE_CONFIG_H 37#include "config.h" 38#endif 39 40#include <stdio.h> 41#include <string.h> /* for memset and strcmp */ 42#include <cassert> 43#include <vector> 44#include "pcrecpp.h" 45 46using pcrecpp::StringPiece; 47using pcrecpp::RE; 48using pcrecpp::RE_Options; 49using pcrecpp::Hex; 50using pcrecpp::Octal; 51using pcrecpp::CRadix; 52 53static bool VERBOSE_TEST = false; 54 55// CHECK dies with a fatal error if condition is not true. It is *not* 56// controlled by NDEBUG, so the check will be executed regardless of 57// compilation mode. Therefore, it is safe to do things like: 58// CHECK_EQ(fp->Write(x), 4) 59#define CHECK(condition) do { \ 60 if (!(condition)) { \ 61 fprintf(stderr, "%s:%d: Check failed: %s\n", \ 62 __FILE__, __LINE__, #condition); \ 63 exit(1); \ 64 } \ 65} while (0) 66 67#define CHECK_EQ(a, b) CHECK(a == b) 68 69static void Timing1(int num_iters) { 70 // Same pattern lots of times 71 RE pattern("ruby:\\d+"); 72 StringPiece p("ruby:1234"); 73 for (int j = num_iters; j > 0; j--) { 74 CHECK(pattern.FullMatch(p)); 75 } 76} 77 78static void Timing2(int num_iters) { 79 // Same pattern lots of times 80 RE pattern("ruby:(\\d+)"); 81 int i; 82 for (int j = num_iters; j > 0; j--) { 83 CHECK(pattern.FullMatch("ruby:1234", &i)); 84 CHECK_EQ(i, 1234); 85 } 86} 87 88static void Timing3(int num_iters) { 89 string text_string; 90 for (int j = num_iters; j > 0; j--) { 91 text_string += "this is another line\n"; 92 } 93 94 RE line_matcher(".*\n"); 95 string line; 96 StringPiece text(text_string); 97 int counter = 0; 98 while (line_matcher.Consume(&text)) { 99 counter++; 100 } 101 printf("Matched %d lines\n", counter); 102} 103 104#if 0 // uncomment this if you have a way of defining VirtualProcessSize() 105 106static void LeakTest() { 107 // Check for memory leaks 108 unsigned long long initial_size = 0; 109 for (int i = 0; i < 100000; i++) { 110 if (i == 50000) { 111 initial_size = VirtualProcessSize(); 112 printf("Size after 50000: %llu\n", initial_size); 113 } 114 char buf[100]; // definitely big enough 115 sprintf(buf, "pat%09d", i); 116 RE newre(buf); 117 } 118 uint64 final_size = VirtualProcessSize(); 119 printf("Size after 100000: %llu\n", final_size); 120 const double growth = double(final_size - initial_size) / final_size; 121 printf("Growth: %0.2f%%", growth * 100); 122 CHECK(growth < 0.02); // Allow < 2% growth 123} 124 125#endif 126 127static void RadixTests() { 128 printf("Testing hex\n"); 129 130#define CHECK_HEX(type, value) \ 131 do { \ 132 type v; \ 133 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \ 134 CHECK_EQ(v, 0x ## value); \ 135 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \ 136 CHECK_EQ(v, 0x ## value); \ 137 } while(0) 138 139 CHECK_HEX(short, 2bad); 140 CHECK_HEX(unsigned short, 2badU); 141 CHECK_HEX(int, dead); 142 CHECK_HEX(unsigned int, deadU); 143 CHECK_HEX(long, 7eadbeefL); 144 CHECK_HEX(unsigned long, deadbeefUL); 145#ifdef HAVE_LONG_LONG 146 CHECK_HEX(long long, 12345678deadbeefLL); 147#endif 148#ifdef HAVE_UNSIGNED_LONG_LONG 149 CHECK_HEX(unsigned long long, cafebabedeadbeefULL); 150#endif 151 152#undef CHECK_HEX 153 154 printf("Testing octal\n"); 155 156#define CHECK_OCTAL(type, value) \ 157 do { \ 158 type v; \ 159 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \ 160 CHECK_EQ(v, 0 ## value); \ 161 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \ 162 CHECK_EQ(v, 0 ## value); \ 163 } while(0) 164 165 CHECK_OCTAL(short, 77777); 166 CHECK_OCTAL(unsigned short, 177777U); 167 CHECK_OCTAL(int, 17777777777); 168 CHECK_OCTAL(unsigned int, 37777777777U); 169 CHECK_OCTAL(long, 17777777777L); 170 CHECK_OCTAL(unsigned long, 37777777777UL); 171#ifdef HAVE_LONG_LONG 172 CHECK_OCTAL(long long, 777777777777777777777LL); 173#endif 174#ifdef HAVE_UNSIGNED_LONG_LONG 175 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); 176#endif 177 178#undef CHECK_OCTAL 179 180 printf("Testing decimal\n"); 181 182#define CHECK_DECIMAL(type, value) \ 183 do { \ 184 type v; \ 185 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \ 186 CHECK_EQ(v, value); \ 187 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \ 188 CHECK_EQ(v, value); \ 189 } while(0) 190 191 CHECK_DECIMAL(short, -1); 192 CHECK_DECIMAL(unsigned short, 9999); 193 CHECK_DECIMAL(int, -1000); 194 CHECK_DECIMAL(unsigned int, 12345U); 195 CHECK_DECIMAL(long, -10000000L); 196 CHECK_DECIMAL(unsigned long, 3083324652U); 197#ifdef HAVE_LONG_LONG 198 CHECK_DECIMAL(long long, -100000000000000LL); 199#endif 200#ifdef HAVE_UNSIGNED_LONG_LONG 201 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); 202#endif 203 204#undef CHECK_DECIMAL 205 206} 207 208static void TestReplace() { 209 printf("Testing Replace\n"); 210 211 struct ReplaceTest { 212 const char *regexp; 213 const char *rewrite; 214 const char *original; 215 const char *single; 216 const char *global; 217 int global_count; // the expected return value from ReplaceAll 218 }; 219 static const ReplaceTest tests[] = { 220 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", 221 "\\2\\1ay", 222 "the quick brown fox jumps over the lazy dogs.", 223 "ethay quick brown fox jumps over the lazy dogs.", 224 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", 225 9 }, 226 { "\\w+", 227 "\\0-NOSPAM", 228 "paul.haahr@google.com", 229 "paul-NOSPAM.haahr@google.com", 230 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM", 231 4 }, 232 { "^", 233 "(START)", 234 "foo", 235 "(START)foo", 236 "(START)foo", 237 1 }, 238 { "^", 239 "(START)", 240 "", 241 "(START)", 242 "(START)", 243 1 }, 244 { "$", 245 "(END)", 246 "", 247 "(END)", 248 "(END)", 249 1 }, 250 { "b", 251 "bb", 252 "ababababab", 253 "abbabababab", 254 "abbabbabbabbabb", 255 5 }, 256 { "b", 257 "bb", 258 "bbbbbb", 259 "bbbbbbb", 260 "bbbbbbbbbbbb", 261 6 }, 262 { "b+", 263 "bb", 264 "bbbbbb", 265 "bb", 266 "bb", 267 1 }, 268 { "b*", 269 "bb", 270 "bbbbbb", 271 "bb", 272 "bbbb", 273 2 }, 274 { "b*", 275 "bb", 276 "aaaaa", 277 "bbaaaaa", 278 "bbabbabbabbabbabb", 279 6 }, 280 { "b*", 281 "bb", 282 "aa\naa\n", 283 "bbaa\naa\n", 284 "bbabbabb\nbbabbabb\nbb", 285 7 }, 286 { "b*", 287 "bb", 288 "aa\raa\r", 289 "bbaa\raa\r", 290 "bbabbabb\rbbabbabb\rbb", 291 7 }, 292 { "b*", 293 "bb", 294 "aa\r\naa\r\n", 295 "bbaa\r\naa\r\n", 296 "bbabbabb\r\nbbabbabb\r\nbb", 297 7 }, 298 // Check empty-string matching (it's tricky!) 299 { "aa|b*", 300 "@", 301 "aa", 302 "@", 303 "@@", 304 2 }, 305 { "b*|aa", 306 "@", 307 "aa", 308 "@aa", 309 "@@@", 310 3 }, 311#ifdef SUPPORT_UTF8 312 { "b*", 313 "bb", 314 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 315 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", 316 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb", 317 5 }, 318 { "b*", 319 "bb", 320 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 321 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", 322 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" 323 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"), 324 9 }, 325#endif 326 { "", NULL, NULL, NULL, NULL, 0 } 327 }; 328 329#ifdef SUPPORT_UTF8 330 const bool support_utf8 = true; 331#else 332 const bool support_utf8 = false; 333#endif 334 335 for (const ReplaceTest *t = tests; t->original != NULL; ++t) { 336 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8)); 337 assert(re.error().empty()); 338 string one(t->original); 339 CHECK(re.Replace(t->rewrite, &one)); 340 CHECK_EQ(one, t->single); 341 string all(t->original); 342 const int replace_count = re.GlobalReplace(t->rewrite, &all); 343 CHECK_EQ(all, t->global); 344 CHECK_EQ(replace_count, t->global_count); 345 } 346 347 // One final test: test \r\n replacement when we're not in CRLF mode 348 { 349 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); 350 assert(re.error().empty()); 351 string all("aa\r\naa\r\n"); 352 CHECK_EQ(re.GlobalReplace("bb", &all), 9); 353 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); 354 } 355 { 356 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); 357 assert(re.error().empty()); 358 string all("aa\r\naa\r\n"); 359 CHECK_EQ(re.GlobalReplace("bb", &all), 9); 360 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); 361 } 362 // TODO: test what happens when no PCRE_NEWLINE_* flag is set. 363 // Alas, the answer depends on how pcre was compiled. 364} 365 366static void TestExtract() { 367 printf("Testing Extract\n"); 368 369 string s; 370 371 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s)); 372 CHECK_EQ(s, "kremvax!boris"); 373 374 // check the RE interface as well 375 CHECK(RE(".*").Extract("'\\0'", "foo", &s)); 376 CHECK_EQ(s, "'foo'"); 377 CHECK(!RE("bar").Extract("'\\0'", "baz", &s)); 378 CHECK_EQ(s, "'foo'"); 379} 380 381static void TestConsume() { 382 printf("Testing Consume\n"); 383 384 string word; 385 386 string s(" aaa b!@#$@#$cccc"); 387 StringPiece input(s); 388 389 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace 390 CHECK(r.Consume(&input, &word)); 391 CHECK_EQ(word, "aaa"); 392 CHECK(r.Consume(&input, &word)); 393 CHECK_EQ(word, "b"); 394 CHECK(! r.Consume(&input, &word)); 395} 396 397static void TestFindAndConsume() { 398 printf("Testing FindAndConsume\n"); 399 400 string word; 401 402 string s(" aaa b!@#$@#$cccc"); 403 StringPiece input(s); 404 405 RE r("(\\w+)"); // matches a word 406 CHECK(r.FindAndConsume(&input, &word)); 407 CHECK_EQ(word, "aaa"); 408 CHECK(r.FindAndConsume(&input, &word)); 409 CHECK_EQ(word, "b"); 410 CHECK(r.FindAndConsume(&input, &word)); 411 CHECK_EQ(word, "cccc"); 412 CHECK(! r.FindAndConsume(&input, &word)); 413} 414 415static void TestMatchNumberPeculiarity() { 416 printf("Testing match-number peculiarity\n"); 417 418 string word1; 419 string word2; 420 string word3; 421 422 RE r("(foo)|(bar)|(baz)"); 423 CHECK(r.PartialMatch("foo", &word1, &word2, &word3)); 424 CHECK_EQ(word1, "foo"); 425 CHECK_EQ(word2, ""); 426 CHECK_EQ(word3, ""); 427 CHECK(r.PartialMatch("bar", &word1, &word2, &word3)); 428 CHECK_EQ(word1, ""); 429 CHECK_EQ(word2, "bar"); 430 CHECK_EQ(word3, ""); 431 CHECK(r.PartialMatch("baz", &word1, &word2, &word3)); 432 CHECK_EQ(word1, ""); 433 CHECK_EQ(word2, ""); 434 CHECK_EQ(word3, "baz"); 435 CHECK(!r.PartialMatch("f", &word1, &word2, &word3)); 436 437 string a; 438 CHECK(RE("(foo)|hello").FullMatch("hello", &a)); 439 CHECK_EQ(a, ""); 440} 441 442static void TestRecursion() { 443 printf("Testing recursion\n"); 444 445 // Get one string that passes (sometimes), one that never does. 446 string text_good("abcdefghijk"); 447 string text_bad("acdefghijkl"); 448 449 // According to pcretest, matching text_good against (\w+)*b 450 // requires match_limit of at least 8192, and match_recursion_limit 451 // of at least 37. 452 453 RE_Options options_ml; 454 options_ml.set_match_limit(8192); 455 RE re("(\\w+)*b", options_ml); 456 CHECK(re.PartialMatch(text_good) == true); 457 CHECK(re.PartialMatch(text_bad) == false); 458 CHECK(re.FullMatch(text_good) == false); 459 CHECK(re.FullMatch(text_bad) == false); 460 461 options_ml.set_match_limit(1024); 462 RE re2("(\\w+)*b", options_ml); 463 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit 464 CHECK(re2.PartialMatch(text_bad) == false); 465 CHECK(re2.FullMatch(text_good) == false); 466 CHECK(re2.FullMatch(text_bad) == false); 467 468 RE_Options options_mlr; 469 options_mlr.set_match_limit_recursion(50); 470 RE re3("(\\w+)*b", options_mlr); 471 CHECK(re3.PartialMatch(text_good) == true); 472 CHECK(re3.PartialMatch(text_bad) == false); 473 CHECK(re3.FullMatch(text_good) == false); 474 CHECK(re3.FullMatch(text_bad) == false); 475 476 options_mlr.set_match_limit_recursion(10); 477 RE re4("(\\w+)*b", options_mlr); 478 CHECK(re4.PartialMatch(text_good) == false); 479 CHECK(re4.PartialMatch(text_bad) == false); 480 CHECK(re4.FullMatch(text_good) == false); 481 CHECK(re4.FullMatch(text_bad) == false); 482} 483 484// A meta-quoted string, interpreted as a pattern, should always match 485// the original unquoted string. 486static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { 487 string quoted = RE::QuoteMeta(unquoted); 488 RE re(quoted, options); 489 CHECK(re.FullMatch(unquoted)); 490} 491 492// A string containing meaningful regexp characters, which is then meta- 493// quoted, should not generally match a string the unquoted string does. 494static void NegativeTestQuoteMeta(string unquoted, string should_not_match, 495 RE_Options options = RE_Options()) { 496 string quoted = RE::QuoteMeta(unquoted); 497 RE re(quoted, options); 498 CHECK(!re.FullMatch(should_not_match)); 499} 500 501// Tests that quoted meta characters match their original strings, 502// and that a few things that shouldn't match indeed do not. 503static void TestQuotaMetaSimple() { 504 TestQuoteMeta("foo"); 505 TestQuoteMeta("foo.bar"); 506 TestQuoteMeta("foo\\.bar"); 507 TestQuoteMeta("[1-9]"); 508 TestQuoteMeta("1.5-2.0?"); 509 TestQuoteMeta("\\d"); 510 TestQuoteMeta("Who doesn't like ice cream?"); 511 TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); 512 TestQuoteMeta("((?!)xxx).*yyy"); 513 TestQuoteMeta("(["); 514 TestQuoteMeta(string("foo\0bar", 7)); 515} 516 517static void TestQuoteMetaSimpleNegative() { 518 NegativeTestQuoteMeta("foo", "bar"); 519 NegativeTestQuoteMeta("...", "bar"); 520 NegativeTestQuoteMeta("\\.", "."); 521 NegativeTestQuoteMeta("\\.", ".."); 522 NegativeTestQuoteMeta("(a)", "a"); 523 NegativeTestQuoteMeta("(a|b)", "a"); 524 NegativeTestQuoteMeta("(a|b)", "(a)"); 525 NegativeTestQuoteMeta("(a|b)", "a|b"); 526 NegativeTestQuoteMeta("[0-9]", "0"); 527 NegativeTestQuoteMeta("[0-9]", "0-9"); 528 NegativeTestQuoteMeta("[0-9]", "[9]"); 529 NegativeTestQuoteMeta("((?!)xxx)", "xxx"); 530} 531 532static void TestQuoteMetaLatin1() { 533 TestQuoteMeta("3\xb2 = 9"); 534} 535 536static void TestQuoteMetaUtf8() { 537#ifdef SUPPORT_UTF8 538 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); 539 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 540 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) 541 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character 542 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) 543 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) 544 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work 545 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) 546 "27\\\xc2\\\xb0", 547 pcrecpp::UTF8()); 548#endif 549} 550 551static void TestQuoteMetaAll() { 552 printf("Testing QuoteMeta\n"); 553 TestQuotaMetaSimple(); 554 TestQuoteMetaSimpleNegative(); 555 TestQuoteMetaLatin1(); 556 TestQuoteMetaUtf8(); 557} 558 559// 560// Options tests contributed by 561// Giuseppe Maxia, CTO, Stardata s.r.l. 562// July 2005 563// 564static void GetOneOptionResult( 565 const char *option_name, 566 const char *regex, 567 const char *str, 568 RE_Options options, 569 bool full, 570 string expected) { 571 572 printf("Testing Option <%s>\n", option_name); 573 if(VERBOSE_TEST) 574 printf("/%s/ finds \"%s\" within \"%s\" \n", 575 regex, 576 expected.c_str(), 577 str); 578 string captured(""); 579 if (full) 580 RE(regex,options).FullMatch(str, &captured); 581 else 582 RE(regex,options).PartialMatch(str, &captured); 583 CHECK_EQ(captured, expected); 584} 585 586static void TestOneOption( 587 const char *option_name, 588 const char *regex, 589 const char *str, 590 RE_Options options, 591 bool full, 592 bool assertive = true) { 593 594 printf("Testing Option <%s>\n", option_name); 595 if (VERBOSE_TEST) 596 printf("'%s' %s /%s/ \n", 597 str, 598 (assertive? "matches" : "doesn't match"), 599 regex); 600 if (assertive) { 601 if (full) 602 CHECK(RE(regex,options).FullMatch(str)); 603 else 604 CHECK(RE(regex,options).PartialMatch(str)); 605 } else { 606 if (full) 607 CHECK(!RE(regex,options).FullMatch(str)); 608 else 609 CHECK(!RE(regex,options).PartialMatch(str)); 610 } 611} 612 613static void Test_CASELESS() { 614 RE_Options options; 615 RE_Options options2; 616 617 options.set_caseless(true); 618 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false); 619 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false); 620 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false); 621 622 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false); 623 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false); 624 options.set_caseless(false); 625 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false); 626} 627 628static void Test_MULTILINE() { 629 RE_Options options; 630 RE_Options options2; 631 const char *str = "HELLO\n" "cruel\n" "world\n"; 632 633 options.set_multiline(true); 634 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false); 635 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false); 636 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false); 637 options.set_multiline(false); 638 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false); 639} 640 641static void Test_DOTALL() { 642 RE_Options options; 643 RE_Options options2; 644 const char *str = "HELLO\n" "cruel\n" "world"; 645 646 options.set_dotall(true); 647 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true); 648 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true); 649 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true); 650 options.set_dotall(false); 651 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false); 652} 653 654static void Test_DOLLAR_ENDONLY() { 655 RE_Options options; 656 RE_Options options2; 657 const char *str = "HELLO world\n"; 658 659 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false); 660 options.set_dollar_endonly(true); 661 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false); 662 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false); 663} 664 665static void Test_EXTRA() { 666 RE_Options options; 667 const char *str = "HELLO"; 668 669 options.set_extra(true); 670 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false ); 671 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false ); 672 options.set_extra(false); 673 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true ); 674} 675 676static void Test_EXTENDED() { 677 RE_Options options; 678 RE_Options options2; 679 const char *str = "HELLO world"; 680 681 options.set_extended(true); 682 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false); 683 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false); 684 TestOneOption("EXTENDED (class)", 685 "^ HE L{2} O " 686 "\\s+ " 687 "\\w+ $ ", 688 str, 689 options, 690 false); 691 692 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false); 693 TestOneOption("EXTENDED (function)", 694 "^ HE L{2} O " 695 "\\s+ " 696 "\\w+ $ ", 697 str, 698 pcrecpp::EXTENDED(), 699 false); 700 701 options.set_extended(false); 702 TestOneOption("no EXTENDED", "HELLO world", str, options, false); 703} 704 705static void Test_NO_AUTO_CAPTURE() { 706 RE_Options options; 707 const char *str = "HELLO world"; 708 string captured; 709 710 printf("Testing Option <no NO_AUTO_CAPTURE>\n"); 711 if (VERBOSE_TEST) 712 printf("parentheses capture text\n"); 713 RE re("(world|universe)$", options); 714 CHECK(re.Extract("\\1", str , &captured)); 715 CHECK_EQ(captured, "world"); 716 options.set_no_auto_capture(true); 717 printf("testing Option <NO_AUTO_CAPTURE>\n"); 718 if (VERBOSE_TEST) 719 printf("parentheses do not capture text\n"); 720 re.Extract("\\1",str, &captured ); 721 CHECK_EQ(captured, "world"); 722} 723 724static void Test_UNGREEDY() { 725 RE_Options options; 726 const char *str = "HELLO, 'this' is the 'world'"; 727 728 options.set_ungreedy(true); 729 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" ); 730 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" ); 731 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" ); 732 733 options.set_ungreedy(false); 734 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" ); 735 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" ); 736} 737 738static void Test_all_options() { 739 const char *str = "HELLO\n" "cruel\n" "world"; 740 RE_Options options; 741 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL); 742 743 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false); 744 options.set_all_options(0); 745 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false); 746 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED); 747 748 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false); 749 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor", 750 " ^ c r u e l $ ", 751 str, 752 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED), 753 false); 754 755 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation", 756 " ^ c r u e l $ ", 757 str, 758 RE_Options() 759 .set_multiline(true) 760 .set_extended(true), 761 false); 762 763 options.set_all_options(0); 764 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false); 765 766} 767 768static void TestOptions() { 769 printf("Testing Options\n"); 770 Test_CASELESS(); 771 Test_MULTILINE(); 772 Test_DOTALL(); 773 Test_DOLLAR_ENDONLY(); 774 Test_EXTENDED(); 775 Test_NO_AUTO_CAPTURE(); 776 Test_UNGREEDY(); 777 Test_EXTRA(); 778 Test_all_options(); 779} 780 781static void TestConstructors() { 782 printf("Testing constructors\n"); 783 784 RE_Options options; 785 options.set_dotall(true); 786 const char *str = "HELLO\n" "cruel\n" "world"; 787 788 RE orig("HELLO.*world", options); 789 CHECK(orig.FullMatch(str)); 790 791 RE copy1(orig); 792 CHECK(copy1.FullMatch(str)); 793 794 RE copy2("not a match"); 795 CHECK(!copy2.FullMatch(str)); 796 copy2 = copy1; 797 CHECK(copy2.FullMatch(str)); 798 copy2 = orig; 799 CHECK(copy2.FullMatch(str)); 800 801 // Make sure when we assign to ourselves, nothing bad happens 802 orig = orig; 803 copy1 = copy1; 804 copy2 = copy2; 805 CHECK(orig.FullMatch(str)); 806 CHECK(copy1.FullMatch(str)); 807 CHECK(copy2.FullMatch(str)); 808} 809 810int main(int argc, char** argv) { 811 // Treat any flag as --help 812 if (argc > 1 && argv[1][0] == '-') { 813 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n" 814 " If 'timingX ###' is specified, run the given timing test\n" 815 " with the given number of iterations, rather than running\n" 816 " the default corectness test.\n", argv[0]); 817 return 0; 818 } 819 820 if (argc > 1) { 821 if ( argc == 2 || atoi(argv[2]) == 0) { 822 printf("timing mode needs a num-iters argument\n"); 823 return 1; 824 } 825 if (!strcmp(argv[1], "timing1")) 826 Timing1(atoi(argv[2])); 827 else if (!strcmp(argv[1], "timing2")) 828 Timing2(atoi(argv[2])); 829 else if (!strcmp(argv[1], "timing3")) 830 Timing3(atoi(argv[2])); 831 else 832 printf("Unknown argument '%s'\n", argv[1]); 833 return 0; 834 } 835 836 printf("PCRE C++ wrapper tests\n"); 837 printf("Testing FullMatch\n"); 838 839 int i; 840 string s; 841 842 /***** FullMatch with no args *****/ 843 844 CHECK(RE("h.*o").FullMatch("hello")); 845 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front 846 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end 847 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op 848 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op 849 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops 850 851 /***** FullMatch with args *****/ 852 853 // Zero-arg 854 CHECK(RE("\\d+").FullMatch("1001")); 855 856 // Single-arg 857 CHECK(RE("(\\d+)").FullMatch("1001", &i)); 858 CHECK_EQ(i, 1001); 859 CHECK(RE("(-?\\d+)").FullMatch("-123", &i)); 860 CHECK_EQ(i, -123); 861 CHECK(!RE("()\\d+").FullMatch("10", &i)); 862 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890", 863 &i)); 864 865 // Digits surrounding integer-arg 866 CHECK(RE("1(\\d*)4").FullMatch("1234", &i)); 867 CHECK_EQ(i, 23); 868 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i)); 869 CHECK_EQ(i, 1); 870 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i)); 871 CHECK_EQ(i, -1); 872 CHECK(RE("(\\d)").PartialMatch("1234", &i)); 873 CHECK_EQ(i, 1); 874 CHECK(RE("(-\\d)").PartialMatch("-1234", &i)); 875 CHECK_EQ(i, -1); 876 877 // String-arg 878 CHECK(RE("h(.*)o").FullMatch("hello", &s)); 879 CHECK_EQ(s, string("ell")); 880 881 // StringPiece-arg 882 StringPiece sp; 883 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i)); 884 CHECK_EQ(sp.size(), 4); 885 CHECK(memcmp(sp.data(), "ruby", 4) == 0); 886 CHECK_EQ(i, 1234); 887 888 // Multi-arg 889 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i)); 890 CHECK_EQ(s, string("ruby")); 891 CHECK_EQ(i, 1234); 892 893 // Ignore non-void* NULL arg 894 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL)); 895 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL)); 896 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL)); 897 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL)); 898#ifdef HAVE_LONG_LONG 899 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL)); 900#endif 901 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL)); 902 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL)); 903 904 // Fail on non-void* NULL arg if the match doesn't parse for the given type. 905 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL)); 906 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL)); 907 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL)); 908 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL)); 909 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL)); 910 911 // Ignored arg 912 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); 913 CHECK_EQ(s, string("ruby")); 914 CHECK_EQ(i, 1234); 915 916 // Type tests 917 { 918 char c; 919 CHECK(RE("(H)ello").FullMatch("Hello", &c)); 920 CHECK_EQ(c, 'H'); 921 } 922 { 923 unsigned char c; 924 CHECK(RE("(H)ello").FullMatch("Hello", &c)); 925 CHECK_EQ(c, static_cast<unsigned char>('H')); 926 } 927 { 928 short v; 929 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 930 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); 931 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); 932 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768); 933 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v)); 934 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v)); 935 } 936 { 937 unsigned short v; 938 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 939 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); 940 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535); 941 CHECK(!RE("(\\d+)").FullMatch("65536", &v)); 942 } 943 { 944 int v; 945 static const int max_value = 0x7fffffff; 946 static const int min_value = -max_value - 1; 947 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 948 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); 949 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value); 950 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value); 951 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v)); 952 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v)); 953 } 954 { 955 unsigned int v; 956 static const unsigned int max_value = 0xfffffffful; 957 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 958 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value); 959 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v)); 960 } 961#ifdef HAVE_LONG_LONG 962# if defined(__MINGW__) || defined(__MINGW32__) 963# define LLD "%I64d" 964# define LLU "%I64u" 965# else 966# define LLD "%lld" 967# define LLU "%llu" 968# endif 969 { 970 long long v; 971 static const long long max_value = 0x7fffffffffffffffLL; 972 static const long long min_value = -max_value - 1; 973 char buf[32]; // definitely big enough for a long long 974 975 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 976 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); 977 978 sprintf(buf, LLD, max_value); 979 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); 980 981 sprintf(buf, LLD, min_value); 982 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); 983 984 sprintf(buf, LLD, max_value); 985 assert(buf[strlen(buf)-1] != '9'); 986 buf[strlen(buf)-1]++; 987 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 988 989 sprintf(buf, LLD, min_value); 990 assert(buf[strlen(buf)-1] != '9'); 991 buf[strlen(buf)-1]++; 992 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 993 } 994#endif 995#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG 996 { 997 unsigned long long v; 998 long long v2; 999 static const unsigned long long max_value = 0xffffffffffffffffULL; 1000 char buf[32]; // definitely big enough for a unsigned long long 1001 1002 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); 1003 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); 1004 1005 sprintf(buf, LLU, max_value); 1006 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); 1007 1008 assert(buf[strlen(buf)-1] != '9'); 1009 buf[strlen(buf)-1]++; 1010 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 1011 } 1012#endif 1013 { 1014 float v; 1015 CHECK(RE("(.*)").FullMatch("100", &v)); 1016 CHECK(RE("(.*)").FullMatch("-100.", &v)); 1017 CHECK(RE("(.*)").FullMatch("1e23", &v)); 1018 } 1019 { 1020 double v; 1021 CHECK(RE("(.*)").FullMatch("100", &v)); 1022 CHECK(RE("(.*)").FullMatch("-100.", &v)); 1023 CHECK(RE("(.*)").FullMatch("1e23", &v)); 1024 } 1025 1026 // Check that matching is fully anchored 1027 CHECK(!RE("(\\d+)").FullMatch("x1001", &i)); 1028 CHECK(!RE("(\\d+)").FullMatch("1001x", &i)); 1029 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001); 1030 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001); 1031 1032 // Braces 1033 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd")); 1034 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde")); 1035 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc")); 1036 1037 // Complicated RE 1038 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo")); 1039 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar")); 1040 CHECK(RE("foo|bar|[A-Z]").FullMatch("X")); 1041 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY")); 1042 1043 // Check full-match handling (needs '$' tacked on internally) 1044 CHECK(RE("fo|foo").FullMatch("fo")); 1045 CHECK(RE("fo|foo").FullMatch("foo")); 1046 CHECK(RE("fo|foo$").FullMatch("fo")); 1047 CHECK(RE("fo|foo$").FullMatch("foo")); 1048 CHECK(RE("foo$").FullMatch("foo")); 1049 CHECK(!RE("foo\\$").FullMatch("foo$bar")); 1050 CHECK(!RE("fo|bar").FullMatch("fox")); 1051 1052 // Uncomment the following if we change the handling of '$' to 1053 // prevent it from matching a trailing newline 1054 if (false) { 1055 // Check that we don't get bitten by pcre's special handling of a 1056 // '\n' at the end of the string matching '$' 1057 CHECK(!RE("foo$").PartialMatch("foo\n")); 1058 } 1059 1060 // Number of args 1061 int a[16]; 1062 CHECK(RE("").FullMatch("")); 1063 1064 memset(a, 0, sizeof(0)); 1065 CHECK(RE("(\\d){1}").FullMatch("1", 1066 &a[0])); 1067 CHECK_EQ(a[0], 1); 1068 1069 memset(a, 0, sizeof(0)); 1070 CHECK(RE("(\\d)(\\d)").FullMatch("12", 1071 &a[0], &a[1])); 1072 CHECK_EQ(a[0], 1); 1073 CHECK_EQ(a[1], 2); 1074 1075 memset(a, 0, sizeof(0)); 1076 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123", 1077 &a[0], &a[1], &a[2])); 1078 CHECK_EQ(a[0], 1); 1079 CHECK_EQ(a[1], 2); 1080 CHECK_EQ(a[2], 3); 1081 1082 memset(a, 0, sizeof(0)); 1083 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234", 1084 &a[0], &a[1], &a[2], &a[3])); 1085 CHECK_EQ(a[0], 1); 1086 CHECK_EQ(a[1], 2); 1087 CHECK_EQ(a[2], 3); 1088 CHECK_EQ(a[3], 4); 1089 1090 memset(a, 0, sizeof(0)); 1091 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345", 1092 &a[0], &a[1], &a[2], 1093 &a[3], &a[4])); 1094 CHECK_EQ(a[0], 1); 1095 CHECK_EQ(a[1], 2); 1096 CHECK_EQ(a[2], 3); 1097 CHECK_EQ(a[3], 4); 1098 CHECK_EQ(a[4], 5); 1099 1100 memset(a, 0, sizeof(0)); 1101 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456", 1102 &a[0], &a[1], &a[2], 1103 &a[3], &a[4], &a[5])); 1104 CHECK_EQ(a[0], 1); 1105 CHECK_EQ(a[1], 2); 1106 CHECK_EQ(a[2], 3); 1107 CHECK_EQ(a[3], 4); 1108 CHECK_EQ(a[4], 5); 1109 CHECK_EQ(a[5], 6); 1110 1111 memset(a, 0, sizeof(0)); 1112 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567", 1113 &a[0], &a[1], &a[2], &a[3], 1114 &a[4], &a[5], &a[6])); 1115 CHECK_EQ(a[0], 1); 1116 CHECK_EQ(a[1], 2); 1117 CHECK_EQ(a[2], 3); 1118 CHECK_EQ(a[3], 4); 1119 CHECK_EQ(a[4], 5); 1120 CHECK_EQ(a[5], 6); 1121 CHECK_EQ(a[6], 7); 1122 1123 memset(a, 0, sizeof(0)); 1124 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" 1125 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch( 1126 "1234567890123456", 1127 &a[0], &a[1], &a[2], &a[3], 1128 &a[4], &a[5], &a[6], &a[7], 1129 &a[8], &a[9], &a[10], &a[11], 1130 &a[12], &a[13], &a[14], &a[15])); 1131 CHECK_EQ(a[0], 1); 1132 CHECK_EQ(a[1], 2); 1133 CHECK_EQ(a[2], 3); 1134 CHECK_EQ(a[3], 4); 1135 CHECK_EQ(a[4], 5); 1136 CHECK_EQ(a[5], 6); 1137 CHECK_EQ(a[6], 7); 1138 CHECK_EQ(a[7], 8); 1139 CHECK_EQ(a[8], 9); 1140 CHECK_EQ(a[9], 0); 1141 CHECK_EQ(a[10], 1); 1142 CHECK_EQ(a[11], 2); 1143 CHECK_EQ(a[12], 3); 1144 CHECK_EQ(a[13], 4); 1145 CHECK_EQ(a[14], 5); 1146 CHECK_EQ(a[15], 6); 1147 1148 /***** PartialMatch *****/ 1149 1150 printf("Testing PartialMatch\n"); 1151 1152 CHECK(RE("h.*o").PartialMatch("hello")); 1153 CHECK(RE("h.*o").PartialMatch("othello")); 1154 CHECK(RE("h.*o").PartialMatch("hello!")); 1155 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); 1156 1157 /***** other tests *****/ 1158 1159 RadixTests(); 1160 TestReplace(); 1161 TestExtract(); 1162 TestConsume(); 1163 TestFindAndConsume(); 1164 TestQuoteMetaAll(); 1165 TestMatchNumberPeculiarity(); 1166 1167 // Check the pattern() accessor 1168 { 1169 const string kPattern = "http://([^/]+)/.*"; 1170 const RE re(kPattern); 1171 CHECK_EQ(kPattern, re.pattern()); 1172 } 1173 1174 // Check RE error field. 1175 { 1176 RE re("foo"); 1177 CHECK(re.error().empty()); // Must have no error 1178 } 1179 1180#ifdef SUPPORT_UTF8 1181 // Check UTF-8 handling 1182 { 1183 printf("Testing UTF-8 handling\n"); 1184 1185 // Three Japanese characters (nihongo) 1186 const unsigned char utf8_string[] = { 1187 0xe6, 0x97, 0xa5, // 65e5 1188 0xe6, 0x9c, 0xac, // 627c 1189 0xe8, 0xaa, 0x9e, // 8a9e 1190 0 1191 }; 1192 const unsigned char utf8_pattern[] = { 1193 '.', 1194 0xe6, 0x9c, 0xac, // 627c 1195 '.', 1196 0 1197 }; 1198 1199 // Both should match in either mode, bytes or UTF-8 1200 RE re_test1("........."); 1201 CHECK(re_test1.FullMatch(utf8_string)); 1202 RE re_test2("...", pcrecpp::UTF8()); 1203 CHECK(re_test2.FullMatch(utf8_string)); 1204 1205 // Check that '.' matches one byte or UTF-8 character 1206 // according to the mode. 1207 string ss; 1208 RE re_test3("(.)"); 1209 CHECK(re_test3.PartialMatch(utf8_string, &ss)); 1210 CHECK_EQ(ss, string("\xe6")); 1211 RE re_test4("(.)", pcrecpp::UTF8()); 1212 CHECK(re_test4.PartialMatch(utf8_string, &ss)); 1213 CHECK_EQ(ss, string("\xe6\x97\xa5")); 1214 1215 // Check that string matches itself in either mode 1216 RE re_test5(utf8_string); 1217 CHECK(re_test5.FullMatch(utf8_string)); 1218 RE re_test6(utf8_string, pcrecpp::UTF8()); 1219 CHECK(re_test6.FullMatch(utf8_string)); 1220 1221 // Check that pattern matches string only in UTF8 mode 1222 RE re_test7(utf8_pattern); 1223 CHECK(!re_test7.FullMatch(utf8_string)); 1224 RE re_test8(utf8_pattern, pcrecpp::UTF8()); 1225 CHECK(re_test8.FullMatch(utf8_string)); 1226 } 1227 1228 // Check that ungreedy, UTF8 regular expressions don't match when they 1229 // oughtn't -- see bug 82246. 1230 { 1231 // This code always worked. 1232 const char* pattern = "\\w+X"; 1233 const string target = "a aX"; 1234 RE match_sentence(pattern); 1235 RE match_sentence_re(pattern, pcrecpp::UTF8()); 1236 1237 CHECK(!match_sentence.FullMatch(target)); 1238 CHECK(!match_sentence_re.FullMatch(target)); 1239 } 1240 1241 { 1242 const char* pattern = "(?U)\\w+X"; 1243 const string target = "a aX"; 1244 RE match_sentence(pattern); 1245 RE match_sentence_re(pattern, pcrecpp::UTF8()); 1246 1247 CHECK(!match_sentence.FullMatch(target)); 1248 CHECK(!match_sentence_re.FullMatch(target)); 1249 } 1250#endif /* def SUPPORT_UTF8 */ 1251 1252 printf("Testing error reporting\n"); 1253 1254 { RE re("a\\1"); CHECK(!re.error().empty()); } 1255 { 1256 RE re("a[x"); 1257 CHECK(!re.error().empty()); 1258 } 1259 { 1260 RE re("a[z-a]"); 1261 CHECK(!re.error().empty()); 1262 } 1263 { 1264 RE re("a[[:foobar:]]"); 1265 CHECK(!re.error().empty()); 1266 } 1267 { 1268 RE re("a(b"); 1269 CHECK(!re.error().empty()); 1270 } 1271 { 1272 RE re("a\\"); 1273 CHECK(!re.error().empty()); 1274 } 1275 1276 // Test that recursion is stopped 1277 TestRecursion(); 1278 1279 // Test Options 1280 if (getenv("VERBOSE_TEST") != NULL) 1281 VERBOSE_TEST = true; 1282 TestOptions(); 1283 1284 // Test the constructors 1285 TestConstructors(); 1286 1287 // Done 1288 printf("OK\n"); 1289 1290 return 0; 1291} 1292