16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/******************************************************************** 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * COPYRIGHT: 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copyright (c) 2002-2013, International Business Machines Corporation and 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * others. All Rights Reserved. 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ********************************************************************/ 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// regextst.cpp 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// ICU Regular Expressions test, part of intltest. 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NOTE!! 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org PLEASE be careful about ASCII assumptions in this test. 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org This test is one of the worst repeat offenders. 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org If you have questions, contact someone on the ICU PMC 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org who has access to an EBCDIC system. 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "intltest.h" 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_REGULAR_EXPRESSIONS 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/regex.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ucnv.h" 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h" 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h" 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regextst.h" 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvector.h" 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "util.h" 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <stdlib.h> 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <string.h> 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <stdio.h> 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h" 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uinvchar.h" 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SUPPORT_MUTATING_INPUT_STRING 0 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Test class boilerplate 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexTest::RegexTest() 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexTest::~RegexTest() 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) logln("TestSuite RegexTest: "); 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (index) { 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0: name = "Basic"; 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) Basic(); 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 1: name = "API_Match"; 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) API_Match(); 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 2: name = "API_Replace"; 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) API_Replace(); 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 3: name = "API_Pattern"; 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) API_Pattern(); 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 4: 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_FILE_IO 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org name = "Extended"; 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) Extended(); 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org name = "skip"; 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 5: name = "Errors"; 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) Errors(); 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 6: name = "PerlTests"; 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) PerlTests(); 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 7: name = "Callbacks"; 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) Callbacks(); 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 8: name = "FindProgressCallbacks"; 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) FindProgressCallbacks(); 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 9: name = "Bug 6149"; 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) Bug6149(); 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 10: name = "UTextBasic"; 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) UTextBasic(); 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 11: name = "API_Match_UTF8"; 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) API_Match_UTF8(); 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 12: name = "API_Replace_UTF8"; 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) API_Replace_UTF8(); 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 13: name = "API_Pattern_UTF8"; 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) API_Pattern_UTF8(); 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 14: name = "PerlTestsUTF8"; 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) PerlTestsUTF8(); 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 15: name = "PreAllocatedUTextCAPI"; 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) PreAllocatedUTextCAPI(); 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 16: name = "Bug 7651"; 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) Bug7651(); 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 17: name = "Bug 7740"; 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) Bug7740(); 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 18: name = "Bug 8479"; 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) Bug8479(); 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 19: name = "Bug 7029"; 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) Bug7029(); 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 20: name = "CheckInvBufSize"; 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) CheckInvBufSize(); 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 21: name = "Bug 9283"; 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (exec) Bug9283(); 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: name = ""; 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; //needed to end loop 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * into ASCII. 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @see utext_openUTF8 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status); 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Error Checking / Reporting macros used in all of the tests. 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void utextToPrintable(char *buf, int32_t bufLen, UText *text) { 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t oldIndex = utext_getNativeIndex(text); 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(text, 0); 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *bufPtr = buf; 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = utext_next32From(text, 0); 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while ((c != U_SENTINEL) && (bufPtr < buf+bufLen)) { 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (0x000020<=c && c<0x00007e) { 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *bufPtr = c; 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sprintf(bufPtr,"U+%04X", c); 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bufPtr+= strlen(bufPtr)-1; 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *bufPtr = '%'; 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bufPtr++; 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(text); 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *bufPtr = 0; 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY) 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *ebuf = (char*)malloc(bufLen); 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_eastrncpy((unsigned char*)ebuf, (const unsigned char*)buf, bufLen); 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_strncpy(buf, ebuf, bufLen); 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org free((void*)ebuf); 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(text, oldIndex); 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic char ASSERT_BUF[1024]; 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst char* RegexTest::extractToAssertBuf(const UnicodeString& message) { 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(message.length()==0) { 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org strcpy(ASSERT_BUF, "[[empty UnicodeString]]"); 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString buf; 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org IntlTest::prettify(message,buf); 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf.length()==0) { 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org strcpy(ASSERT_BUF, "[[escape() returned 0 chars]]"); 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf.extract(0, 0x7FFFFFFF, ASSERT_BUF, sizeof(ASSERT_BUF)-1); 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ASSERT_BUF[0]==0) { 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ASSERT_BUF[0]=0; 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(int32_t i=0;i<buf.length();i++) { 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar ch = buf[i]; 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch); 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0; 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ASSERT_BUF; 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);} 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure. status=%s", \ 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org __FILE__, __LINE__, u_errorName(status)); return;}} 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};} 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\ 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgif (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status=%s, got %s", \ 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org __LINE__, u_errorName(errcode), u_errorName(status));};} 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \ 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org "RegexTest failure at line %d, from %d. status=%d\n",__LINE__, (line), status); }} 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \ 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}} 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT_UNISTR(ustr,inv) {if (!(ustr==inv)) {errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s,%s) failed \n", __FILE__, __LINE__, extractToAssertBuf(ustr),inv);};} 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool testUTextEqual(UText *uta, UText *utb) { 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 ca = 0; 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 cb = 0; 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(uta, 0); 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(utb, 0); 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ca = utext_next32(uta); 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cb = utext_next32(utb); 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ca != cb) { 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while (ca != U_SENTINEL); 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ca == cb; 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param expected expected text in UTF-8 (not platform) codepage 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::assertUText(const char *expected, UText *actual, const char *file, int line) { 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText expectedText = UTEXT_INITIALIZER; 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&expectedText, expected, -1, &status); 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(status)) { 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("%s:%d: assertUText: error %s calling utext_openUTF8(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected)); 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(utext_nativeLength(&expectedText)==0 && (strlen(expected)!=0)) { 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("%s:%d: assertUText: expected is %d utf-8 bytes, but utext_nativeLength(expectedText) returned 0.", file, line, strlen(expected)); 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(actual, 0); 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!testUTextEqual(&expectedText, actual)) { 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char buf[201 /*21*/]; 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char expectedBuf[201]; 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual); 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText); 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("%s:%d: assertUText: Failure: expected \"%s\" (%d chars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual)); 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&expectedText); 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param expected invariant (platform local text) input 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::assertUTextInvariant(const char *expected, UText *actual, const char *file, int line) { 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText expectedText = UTEXT_INITIALIZER; 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&expectedText, expected, -1, &status); 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(status)) { 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("%s:%d: assertUTextInvariant: error %s calling regextst_openUTF8FromInvariant(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected)); 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(actual, 0); 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!testUTextEqual(&expectedText, actual)) { 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char buf[201 /*21*/]; 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char expectedBuf[201]; 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual); 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText); 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual)); 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&expectedText); 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Assumes utf-8 input 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actual), __FILE__, __LINE__) 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Assumes Invariant input 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((expected), (actual), __FILE__, __LINE__) 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This buffer ( inv_buf ) is used to hold the UTF-8 strings 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * passed into utext_openUTF8. An error will be given if 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * INV_BUFSIZ is too small. It's only used on EBCDIC systems. 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define INV_BUFSIZ 2048 /* increase this if too small */ 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int64_t inv_next=0; 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if U_CHARSET_FAMILY!=U_ASCII_FAMILY 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic char inv_buf[INV_BUFSIZ]; 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t length, UErrorCode *status) { 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length==-1) length=strlen(inv); 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if U_CHARSET_FAMILY==U_ASCII_FAMILY 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inv_next+=length; 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return utext_openUTF8(ut, inv, length, status); 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(inv_next+length+1>INV_BUFSIZ) { 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "%s:%d Error: INV_BUFSIZ #defined to be %d but needs to be at least %d.\n", 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org __FILE__, __LINE__, INV_BUFSIZ, (inv_next+length+1)); 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_MEMORY_ALLOCATION_ERROR; 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unsigned char *buf = (unsigned char*)inv_buf+inv_next; 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_aestrncpy(buf, (const uint8_t*)inv, length); 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inv_next+=length; 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, " Note: INV_BUFSIZ at %d, used=%d\n", INV_BUFSIZ, inv_next); 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return utext_openUTF8(ut, (const char*)buf, length, status); 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// REGEX_TESTLM Macro + invocation function to simplify writing quick tests 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// for the LookingAt() and Match() functions. 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// usage: 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// REGEX_TESTLM("pattern", "input text", lookingAt expected, matches expected); 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The expected results are UBool - TRUE or FALSE. 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The input text is unescaped. The pattern is not. 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_TESTLM(pat, text, looking, match) {doRegexLMTest(pat, text, looking, match, __LINE__);doRegexLMTestUTF8(pat, text, looking, match, __LINE__);} 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line) { 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString pattern(pat, -1, US_INV); 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString inputText(text, -1, US_INV); 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *REPattern = NULL; 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *REMatcher = NULL; 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool retVal = TRUE; 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString patString(pat, -1, US_INV); 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REPattern = RegexPattern::compile(patString, 0, pe, status); 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dataerrln("RegexTest failure in RegexPattern::compile() at line %d. Status = %s", 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, u_errorName(status)); 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (line==376) { RegexPatternDump(REPattern);} 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString inputString(inputText); 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString unEscapedInput = inputString.unescape(); 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REMatcher = REPattern->matcher(unEscapedInput, status); 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest failure in REPattern::matcher() at line %d. Status = %s\n", 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, u_errorName(status)); 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool actualmatch; 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org actualmatch = REMatcher->lookingAt(status); 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest failure in lookingAt() at line %d. Status = %s\n", 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, u_errorName(status)); 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retVal = FALSE; 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (actualmatch != looking) { 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest: wrong return from lookingAt() at line %d.\n", line); 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retVal = FALSE; 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org actualmatch = REMatcher->matches(status); 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest failure in matches() at line %d. Status = %s\n", 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, u_errorName(status)); 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retVal = FALSE; 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (actualmatch != match) { 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest: wrong return from matches() at line %d.\n", line); 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retVal = FALSE; 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (retVal == FALSE) { 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPatternDump(REPattern); 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete REPattern; 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete REMatcher; 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return retVal; 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool looking, UBool match, int32_t line) { 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText pattern = UTEXT_INITIALIZER; 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t inputUTF8Length; 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *textChars = NULL; 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText inputText = UTEXT_INITIALIZER; 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *REPattern = NULL; 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *REMatcher = NULL; 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool retVal = TRUE; 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&pattern, pat, -1, &status); 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REPattern = RegexPattern::compile(&pattern, 0, pe, status); 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8). Status = %s\n", 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, u_errorName(status)); 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString inputString(text, -1, US_INV); 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString unEscapedInput = inputString.unescape(); 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status)); 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status); 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // UTF-8 does not allow unpaired surrogates, so this could actually happen 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org logln("RegexTest unable to convert input to UTF8 at line %d. Status = %s\n", line, u_errorName(status)); 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; // not a failure of the Regex engine 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; // buffer overflow 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org textChars = new char[inputUTF8Length+1]; 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(), status); 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&inputText, textChars, inputUTF8Length, &status); 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REMatcher = &REPattern->matcher(status)->reset(&inputText); 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Status = %s\n", 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, u_errorName(status)); 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool actualmatch; 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org actualmatch = REMatcher->lookingAt(status); 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest failure in lookingAt() at line %d (UTF8). Status = %s\n", 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, u_errorName(status)); 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retVal = FALSE; 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (actualmatch != looking) { 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest: wrong return from lookingAt() at line %d (UTF8).\n", line); 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retVal = FALSE; 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org actualmatch = REMatcher->matches(status); 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest failure in matches() at line %d (UTF8). Status = %s\n", 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, u_errorName(status)); 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retVal = FALSE; 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (actualmatch != match) { 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", line); 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retVal = FALSE; 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (retVal == FALSE) { 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPatternDump(REPattern); 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete REPattern; 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete REMatcher; 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&inputText); 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&pattern); 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete[] textChars; 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return retVal; 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// REGEX_ERR Macro + invocation function to simplify writing tests 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// regex tests for incorrect patterns 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// usage: 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// REGEX_ERR("pattern", expected error line, column, expected status); 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ERR(pat, line, col, status) regex_err(pat, line, col, status, __LINE__); 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol, 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode expectedStatus, int32_t line) { 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString pattern(pat); 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *callerPattern = NULL; 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compile the caller's pattern 5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString patString(pat); 5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org callerPattern = RegexPattern::compile(patString, 0, pe, status); 5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status != expectedStatus) { 5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status)); 5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status != U_ZERO_ERROR) { 5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pe.line != errLine || pe.offset != errCol) { 5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Line %d: incorrect line/offset from UParseError. Expected %d/%d; got %d/%d.\n", 5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, errLine, errCol, pe.line, pe.offset); 5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete callerPattern; 5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compile again, using a UTF-8-based UText 5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText patternText = UTEXT_INITIALIZER; 5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&patternText, pat, -1, &status); 5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org callerPattern = RegexPattern::compile(&patternText, 0, pe, status); 5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status != expectedStatus) { 5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status)); 5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status != U_ZERO_ERROR) { 5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pe.line != errLine || pe.offset != errCol) { 5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Line %d: incorrect line/offset from UParseError. Expected %d/%d; got %d/%d.\n", 5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, errLine, errCol, pe.line, pe.offset); 5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete callerPattern; 5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&patternText); 5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Basic Check for basic functionality of regex pattern matching. 5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Avoid the use of REGEX_FIND test macro, which has 5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// substantial dependencies on basic Regex functionality. 5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Basic() { 5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Debug - slide failing test cases early 5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE); 5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pattern; 5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unescape(), UREGEX_CASE_INSENSITIVE, pe, status); 5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPatternDump(pattern); 5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz").unescape(), status); 5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool result = m->find(); 5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("result = %d\n", result); 5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd"); 5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX===================="); 5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(1); 5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Pattern with parentheses 5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("st(abc)ring", "stabcring thing", TRUE, FALSE); 5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("st(abc)ring", "stabcring", TRUE, TRUE); 5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("st(abc)ring", "stabcrung", FALSE, FALSE); 5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Patterns with * 6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("st(abc)*ring", "string", TRUE, TRUE); 6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("st(abc)*ring", "stabcring", TRUE, TRUE); 6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("st(abc)*ring", "stabcabcring", TRUE, TRUE); 6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("st(abc)*ring", "stabcabcdring", FALSE, FALSE); 6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("st(abc)*ring", "stabcabcabcring etc.", TRUE, FALSE); 6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a*", "", TRUE, TRUE); 6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a*", "b", TRUE, FALSE); 6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Patterns with "." 6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM(".", "abc", TRUE, FALSE); 6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("...", "abc", TRUE, TRUE); 6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("....", "abc", FALSE, FALSE); 6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM(".*", "abcxyz123", TRUE, TRUE); 6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("ab.*xyz", "abcdefghij", FALSE, FALSE); 6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("ab.*xyz", "abcdefg...wxyz", TRUE, TRUE); 6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz", TRUE, TRUE); 6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz...", TRUE, FALSE); 6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Patterns with * applied to chars at end of literal string 6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("abc*", "ab", TRUE, TRUE); 6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("abc*", "abccccc", TRUE, TRUE); 6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Supplemental chars match as single chars, not a pair of surrogates. 6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM(".", "\\U00011000", TRUE, TRUE); 6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("...", "\\U00011000x\\U00012002", TRUE, TRUE); 6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("...", "\\U00011000x\\U00012002y", TRUE, FALSE); 6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // UnicodeSets in the pattern 6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("[1-6]", "1", TRUE, TRUE); 6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("[1-6]", "3", TRUE, TRUE); 6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("[1-6]", "7", FALSE, FALSE); 6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE); 6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE); 6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a[1-6]b", "a3b", TRUE, TRUE); 6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a[0-9]*b", "a123b", TRUE, TRUE); 6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a[0-9]*b", "abc", TRUE, FALSE); 6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("[\\p{Nd}]*", "123456", TRUE, TRUE); 6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("[\\p{Nd}]*", "a123456", TRUE, FALSE); // note that * matches 0 occurences. 6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("[a][b][[:Zs:]]*", "ab ", TRUE, TRUE); 6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // OR operator in patterns 6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("(a|b)", "a", TRUE, TRUE); 6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("(a|b)", "b", TRUE, TRUE); 6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("(a|b)", "c", FALSE, FALSE); 6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a|b", "b", TRUE, TRUE); 6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabc", TRUE, TRUE); 6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabdc", TRUE, FALSE); 6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "ac", TRUE, TRUE); 6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "123", TRUE, TRUE); 6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "123", TRUE, TRUE); 6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "222211111czzzzw", TRUE, FALSE); 6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // + 6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("ab+", "abbc", TRUE, FALSE); 6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("ab+c", "ac", FALSE, FALSE); 6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("b+", "", FALSE, FALSE); 6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("(abc|def)+", "defabc", TRUE, TRUE); 6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM(".+y", "zippity dooy dah ", TRUE, FALSE); 6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM(".+y", "zippity dooy", TRUE, TRUE); 6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // ? 6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("ab?", "ab", TRUE, TRUE); 6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("ab?", "a", TRUE, TRUE); 6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("ab?", "ac", TRUE, FALSE); 6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("ab?", "abb", TRUE, FALSE); 6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a(b|c)?d", "abd", TRUE, TRUE); 6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a(b|c)?d", "acd", TRUE, TRUE); 6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a(b|c)?d", "ad", TRUE, TRUE); 6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a(b|c)?d", "abcd", FALSE, FALSE); 6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("a(b|c)?d", "ab", FALSE, FALSE); 6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Escape sequences that become single literal chars, handled internally 6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // by ICU's Unescape. 6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // REGEX_TESTLM("\101\142", "Ab", TRUE, TRUE); // Octal TODO: not implemented yet. 6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("\\a", "\\u0007", TRUE, TRUE); // BEL 6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("\\cL", "\\u000c", TRUE, TRUE); // Control-L 7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("\\e", "\\u001b", TRUE, TRUE); // Escape 7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("\\f", "\\u000c", TRUE, TRUE); // Form Feed 7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("\\n", "\\u000a", TRUE, TRUE); // new line 7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("\\r", "\\u000d", TRUE, TRUE); // CR 7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("\\t", "\\u0009", TRUE, TRUE); // Tab 7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("\\u1234", "\\u1234", TRUE, TRUE); 7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("\\U00001234", "\\u1234", TRUE, TRUE); 7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM(".*\\Ax", "xyz", TRUE, FALSE); // \A matches only at the beginning of input 7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM(".*\\Ax", " xyz", FALSE, FALSE); // \A matches only at the beginning of input 7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Escape of special chars in patterns 7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_TESTLM("\\\\\\|\\(\\)\\[\\{\\~\\$\\*\\+\\?\\.", "\\\\|()[{~$*+?.", TRUE, TRUE); 7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// UTextBasic Check for quirks that are specific to the UText 7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// implementation. 7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::UTextBasic() { 7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText pattern = UTEXT_INITIALIZER; 7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&pattern, str_abc, -1, &status); 7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher matcher(&pattern, 0, status); 7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText input = UTEXT_INITIALIZER; 7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&input, str_abc, -1, &status); 7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.reset(&input); 7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); 7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.reset(matcher.inputText()); 7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); 7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&pattern); 7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&input); 7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// API_Match Test that the API for class RegexMatcher 7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// is present and nominally working, but excluding functions 7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// implementing replace operations. 7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Match() { 7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags = 0; 7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Debug - slide failing test cases early 7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Simple pattern compilation 7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString re("abc"); 7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat2; 7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat2 = RegexPattern::compile(re, flags, pe, status); 7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString inStr1 = "abcdef this is a test"; 7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString instr2 = "not abc"; 7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString empty = ""; 7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Matcher creation and reset. 7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *m1 = pat2->matcher(inStr1, status); 7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(status) == TRUE); 7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->input() == inStr1); 7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(instr2); 7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(status) == FALSE); 7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->input() == instr2); 7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(inStr1); 7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->input() == inStr1); 7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(status) == TRUE); 7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(empty); 7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(status) == FALSE); 7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->input() == empty); 7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m1->pattern() == pat2); 7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // reset(pos, status) 8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(inStr1); 8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(4, status); 8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->input() == inStr1); 8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(status) == TRUE); 8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(-1, status); 8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(0, status); 8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t len = m1->input().length(); 8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(len-1, status); 8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(len, status); 8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(len+1, status); 8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match(pos, status) 8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(instr2); 8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(4, status) == TRUE); 8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(); 8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(3, status) == FALSE); 8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(); 8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(5, status) == FALSE); 8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(4, status) == TRUE); 8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(-1, status) == FALSE); 8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Match() at end of string should fail, but should not 8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // be an error. 8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len = m1->input().length(); 8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(len, status) == FALSE); 8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Match beyond end of string should fail with an error. 8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(len+1, status) == FALSE); 8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Successful match at end of string. 8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m("A?", 0, status); // will match zero length string. 8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(inStr1); 8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len = inStr1.length(); 8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.matches(len, status) == TRUE); 8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(empty); 8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.matches(0, status) == TRUE); 8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // lookingAt(pos, status) 8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(instr2); // "not abc" 8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(5, status) == FALSE); 8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(3, status) == FALSE); 8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE); 8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len = m1->input().length(); 8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(len, status) == FALSE); 8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(len+1, status) == FALSE); 8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete m1; 8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat2; 8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Capture Group. 8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // RegexMatcher::start(); 8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // RegexMatcher::end(); 8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // RegexMatcher::groupCount(); 8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags=0; 9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString re("01(23(45)67)(.*)"); 9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString data = "0123456789"; 9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *matcher = pat->matcher(data, status); 9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->lookingAt(status) == TRUE); 9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org static const int32_t matchStarts[] = {0, 2, 4, 8}; 9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org static const int32_t matchEnds[] = {10, 8, 6, 10}; 9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<4; i++) { 9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t actualStart = matcher->start(i, status); 9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (actualStart != matchStarts[i]) { 9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest failure at line %d, index %d. Expected %d, got %d\n", 9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org __LINE__, i, matchStarts[i], actualStart); 9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t actualEnd = matcher->end(i, status); 9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (actualEnd != matchEnds[i]) { 9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest failure at line %d index %d. Expected %d, got %d\n", 9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org __LINE__, i, matchEnds[i], actualEnd); 9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(0, status) == matcher->start(status)); 9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->end(0, status) == matcher->end(status)); 9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(); 9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); 9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->lookingAt(status); 9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->group(status) == "0123456789"); 9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->group(0, status) == "0123456789"); 9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->group(1, status) == "234567" ); 9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->group(2, status) == "45" ); 9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->group(3, status) == "89" ); 9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(); 9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); 9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete matcher; 9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat; 9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // find 9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags=0; 9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString re("abc"); 9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString data = ".abc..abc...abc.."; 9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 012345678901234567 9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *matcher = pat->matcher(data, status); 9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 1); 9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 6); 9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 12); 9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find() == FALSE); 9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find() == FALSE); 9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(); 9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 1); 9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(0, status)); 9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 1); 9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(1, status)); 9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 1); 9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(2, status)); 9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 6); 9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(12, status)); 9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 12); 9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(13, status) == FALSE); 9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(16, status) == FALSE); 9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(17, status) == FALSE); 9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE); 9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR); 10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->groupCount() == 0); 10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete matcher; 10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat; 10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // find, with \G in pattern (true if at the end of a previous match). 10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags=0; 10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString re(".*?(?:(\\Gabc)|(abc))", -1, US_INV); 10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString data = ".abcabc.abc.."; 10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 012345678901234567 10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *matcher = pat->matcher(data, status); 10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 0); 10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(1, status) == -1); 10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(2, status) == 1); 10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 4); 10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(1, status) == 4); 10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(2, status) == -1); 10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete matcher; 10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat; 10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // find with zero length matches, match position should bump ahead 10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // to prevent loops. 10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m("(?= ?)", 0, status); // This pattern will zero-length matches anywhere, 10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // using an always-true look-ahead. 10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString s(" "); 10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(s); 10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; ; i++) { 10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (m.find() == FALSE) { 10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.start(status) == i); 10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.end(status) == i); 10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(i==5); 10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check that the bump goes over surrogate pairs OK 10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = UNICODE_STRING_SIMPLE("\\U00010001\\U00010002\\U00010003\\U00010004"); 10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = s.unescape(); 10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(s); 10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; ; i+=2) { 10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (m.find() == FALSE) { 10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.start(status) == i); 10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.end(status) == i); 10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(i==10); 10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // find() loop breaking test. 10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // with pattern of /.?/, should see a series of one char matches, then a single 10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match of zero length at the end of the input string. 10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m(".?", 0, status); 10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString s(" "); 10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(s); 10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; ; i++) { 10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (m.find() == FALSE) { 10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.start(status) == i); 10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); 10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(i==5); 10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Matchers with no input string behave as if they had an empty input string. 10966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 10976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 10996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 11006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m(".?", 0, status); 11016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 11026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.find()); 11036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.start(status) == 0); 11046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.input() == ""); 11056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 11076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 11086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *p = RegexPattern::compile(".", 0, status); 11096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *m = p->matcher(status); 11106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 11116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m->find() == FALSE); 11136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m->input() == ""); 11146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete m; 11156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete p; 11166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 11196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Regions 11206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 11216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 11226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 11236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testString("This is test data"); 11246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m(".*", testString, 0, status); 11256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 11266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionStart() == 0); 11276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionEnd() == testString.length()); 11286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 11296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 11306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.region(2,4, status); 11326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 11336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.matches(status)); 11346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.start(status)==2); 11356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.end(status)==4); 11366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 11376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(); 11396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionStart() == 0); 11406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionEnd() == testString.length()); 11416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString shorterString("short"); 11436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(shorterString); 11446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionStart() == 0); 11456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionEnd() == shorterString.length()); 11466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 11486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); 11496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 11506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.reset()); 11516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 11526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); 11546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 11556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.reset()); 11566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 11576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 11596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); 11606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 11616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.reset()); 11626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 11636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); 11656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 11666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.reset()); 11676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 11686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 11726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // hitEnd() and requireEnd() 11736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 11746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 11756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 11766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testString("aabb"); 11776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m1(".*", testString, 0, status); 11786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1.lookingAt(status) == TRUE); 11796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1.hitEnd() == TRUE); 11806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1.requireEnd() == FALSE); 11816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 11826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 11846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m2("a*", testString, 0, status); 11856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m2.lookingAt(status) == TRUE); 11866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m2.hitEnd() == FALSE); 11876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m2.requireEnd() == FALSE); 11886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 11896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 11916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m3(".*$", testString, 0, status); 11926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m3.lookingAt(status) == TRUE); 11936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m3.hitEnd() == TRUE); 11946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m3.requireEnd() == TRUE); 11956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 11966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 12006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compilation error on reset with UChar * 12016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // These were a hazard that people were stumbling over with runtime errors. 12026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Changed them to compiler errors by adding private methods that more closely 12036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // matched the incorrect use of the functions. 12046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 12056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 12066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 12076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 12086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar ucharString[20]; 12096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m(".", 0, status); 12106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(ucharString); // should not compile. 12116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *p = RegexPattern::compile(".", 0, status); 12136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *m2 = p->matcher(ucharString, status); // should not compile. 12146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m3(".", ucharString, 0, status); // Should not compile 12166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 12186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 12206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Time Outs. 12216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note: These tests will need to be changed when the regexp engine is 12226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // able to detect and cut short the exponential time behavior on 12236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // this type of match. 12246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 12256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 12266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 12276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Enough 'a's in the string to cause the match to time out. 12286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (Each on additonal 'a' doubles the time) 12296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa"); 12306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher matcher("(a+)+b", testString, 0, status); 12316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 12326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.getTimeLimit() == 0); 12336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.setTimeLimit(100, status); 12346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.getTimeLimit() == 100); 12356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 12366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_REGEX_TIME_OUT); 12376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 12396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 12406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Few enough 'a's to slip in under the time limit. 12416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testString("aaaaaaaaaaaaaaaaaa"); 12426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher matcher("(a+)+b", testString, 0, status); 12436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 12446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.setTimeLimit(100, status); 12456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 12466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 12476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 12506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Stack Limits 12516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 12526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 12536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 12546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000, filled with 'A' 12556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations 12576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // of the '+', and makes the stack frames larger. 12586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher matcher("(A)+A$", testString, 0, status); 12596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // With the default stack, this match should fail to run 12616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 12626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); 12636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // With unlimited stack, it should run 12656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 12666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.setStackLimit(0, status); 12676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 12686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.lookingAt(status) == TRUE); 12696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 12706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.getStackLimit() == 0); 12716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // With a limited stack, it the match should fail 12736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 12746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.setStackLimit(10000, status); 12756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 12766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); 12776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.getStackLimit() == 10000); 12786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A pattern that doesn't save state should work with 12816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // a minimal sized stack 12826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 12836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 12846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testString = "abc"; 12856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher matcher("abc", testString, 0, status); 12866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 12876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.setStackLimit(30, status); 12886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 12896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.matches(status) == TRUE); 12906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 12916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.getStackLimit() == 30); 12926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Negative stack sizes should fail 12946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 12956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.setStackLimit(1000, status); 12966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 12976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.setStackLimit(-1, status); 12986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 12996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.getStackLimit() == 1000); 13006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 13046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 13116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 13126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// API_Replace API test for class RegexMatcher, testing the 13136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Replace family of functions. 13146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 13156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 13166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Replace() { 13176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Replace 13196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags=0; 13216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 13226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 13236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString re("abc"); 13256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 13266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 13276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString data = ".abc..abc...abc.."; 13286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 012345678901234567 13296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *matcher = pat->matcher(data, status); 13306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Plain vanilla matches. 13336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString dest; 13356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher->replaceFirst("yz", status); 13366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 13376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == ".yz..abc...abc.."); 13386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher->replaceAll("yz", status); 13406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 13416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == ".yz..yz...yz.."); 13426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Plain vanilla non-matches. 13456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString d2 = ".abx..abx...abx.."; 13476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(d2); 13486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher->replaceFirst("yz", status); 13496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 13506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == ".abx..abx...abx.."); 13516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher->replaceAll("yz", status); 13536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 13546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == ".abx..abx...abx.."); 13556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Empty source string 13586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString d3 = ""; 13606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(d3); 13616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher->replaceFirst("yz", status); 13626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 13636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == ""); 13646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher->replaceAll("yz", status); 13666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 13676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == ""); 13686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Empty substitution string 13716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(data); // ".abc..abc...abc.." 13736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher->replaceFirst("", status); 13746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 13756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == "...abc...abc.."); 13766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher->replaceAll("", status); 13786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 13796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == "........"); 13806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match whole string 13836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString d4 = "abc"; 13856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(d4); 13866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher->replaceFirst("xyz", status); 13876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 13886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == "xyz"); 13896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher->replaceAll("xyz", status); 13916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 13926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == "xyz"); 13936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Capture Group, simple case 13966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 13976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString re2("a(..)"); 13986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat2 = RegexPattern::compile(re2, flags, pe, status); 13996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString d5 = "abcdefg"; 14016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *matcher2 = pat2->matcher(d5, status); 14026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher2->replaceFirst("$1$1", status); 14046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == "bcbcdefg"); 14066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1."), status); 14086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == "The value of $1 is bc.defg"); 14106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher2->replaceFirst("$ by itself, no group number $$$", status); 14126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == "$ by itself, no group number $$$defg"); 14146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U0001D7CF."); 14166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org replacement = replacement.unescape(); 14176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = matcher2->replaceFirst(replacement, status); 14186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(dest == "Supplemental Digit 1 bc.defg"); 14206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher2->replaceFirst("bad capture group number $5...",status), U_INDEX_OUTOFBOUNDS_ERROR); 14226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 14256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Replacement String with \u hex escapes 14266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 14276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 14286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString src = "abc 1 abc 2 abc 3"; 14296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\u0043--"); 14306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(src); 14316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString result = matcher->replaceAll(substitute, status); 14326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == "--C-- 1 --C-- 2 --C-- 3"); 14346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 14366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString src = "abc !"; 14376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\U00010000--"); 14386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(src); 14396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString result = matcher->replaceAll(substitute, status); 14406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString expected = UnicodeString("--"); 14426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expected.append((UChar32)0x10000); 14436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expected.append("-- !"); 14446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == expected); 14456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: need more through testing of capture substitutions. 14476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Bug 4057 14496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 14506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 14516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 14526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString s = "The matches start with ss and end with ee ss stuff ee fin"; 14536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m("ss(.*?)ee", 0, status); 14546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString result; 14566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Multiple finds do NOT bump up the previous appendReplacement postion. 14586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(s); 14596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(); 14606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(); 14616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.appendReplacement(result, "ooh", status); 14626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == "The matches start with ss and end with ee ooh"); 14646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // After a reset into the interior of a string, appendReplacemnt still starts at beginning. 14666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 14676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result.truncate(0); 14686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(10, status); 14696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(); 14706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(); 14716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.appendReplacement(result, "ooh", status); 14726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == "The matches start with ss and end with ee ooh"); 14746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // find() at interior of string, appendReplacemnt still starts at beginning. 14766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 14776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result.truncate(0); 14786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(); 14796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(10, status); 14806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(); 14816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.appendReplacement(result, "ooh", status); 14826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 14836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == "The matches start with ss and end with ee ooh"); 14846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.appendTail(result); 14866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == "The matches start with ss and end with ee ooh fin"); 14876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete matcher2; 14916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat2; 14926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete matcher; 14936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat; 14946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 14956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 14986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 14996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// API_Pattern Test that the API for class RegexPattern is 15006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// present and nominally working. 15016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 15026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 15036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Pattern() { 15046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern pata; // Test default constructor to not crash. 15056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern patb; 15066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pata == patb); 15086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pata == pata); 15096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString re1("abc[a-l][m-z]"); 15116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString re2("def"); 15126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 15136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 15146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat1 = RegexPattern::compile(re1, 0, pe, status); 15166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat2 = RegexPattern::compile(re2, 0, pe, status); 15176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 15186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1 == *pat1); 15196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1 != pata); 15206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Assign 15226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patb = *pat1; 15236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(patb == *pat1); 15246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Copy Construct 15266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern patc(*pat1); 15276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(patc == *pat1); 15286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(patb == patc); 15296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1 != pat2); 15306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patb = *pat2; 15316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(patb != patc); 15326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(patb == *pat2); 15336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compile with no flags. 15356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat1a = RegexPattern::compile(re1, pe, status); 15366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1a == *pat1); 15376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1a->flags() == 0); 15396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compile with different flags should be not equal 15416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat1b = RegexPattern::compile(re1, UREGEX_CASE_INSENSITIVE, pe, status); 15426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 15436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1b != *pat1a); 15456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE); 15466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1a->flags() == 0); 15476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1b; 15486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // clone 15506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat1c = pat1->clone(); 15516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1c == *pat1); 15526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1c != *pat2); 15536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1c; 15556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1a; 15566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 15576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat2; 15586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 15616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Verify that a matcher created from a cloned pattern works. 15626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (Jitterbug 3423) 15636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 15646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 15656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 15666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pSource = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\p{L}+"), 0, status); 15676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pClone = pSource->clone(); 15686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pSource; 15696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *mFromClone = pClone->matcher(status); 15706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 15716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString s = "Hello World"; 15726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mFromClone->reset(s); 15736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(mFromClone->find() == TRUE); 15746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(mFromClone->group(status) == "Hello"); 15756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(mFromClone->find() == TRUE); 15766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(mFromClone->group(status) == "World"); 15776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(mFromClone->find() == FALSE); 15786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete mFromClone; 15796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pClone; 15806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 15836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // matches convenience API 15846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 15856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches(".*", "random input", pe, status) == TRUE); 15866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 15876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE); 15886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 15896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE); 15906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 15916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE); 15926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 15936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE); 15946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 15956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 15966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); 15976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 15986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 16016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Split() 16026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 16036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 16046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = RegexPattern::compile(" +", pe, status); 16056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 16066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString fields[10]; 16076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t n; 16096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("Now is the time", fields, 10, status); 16106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 16116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==4); 16126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="Now"); 16136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="is"); 16146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="the"); 16156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="time"); 16166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]==""); 16176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("Now is the time", fields, 2, status); 16196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 16206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==2); 16216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="Now"); 16226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="is the time"); 16236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="the"); // left over from previous test 16246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[1] = "*"; 16266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 16276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("Now is the time", fields, 1, status); 16286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 16296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==1); 16306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="Now is the time"); 16316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="*"); 16326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 16336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" Now is the time ", fields, 10, status); 16356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 16366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==6); 16376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==""); 16386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="Now"); 16396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="is"); 16406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="the"); 16416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="time"); 16426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]==""); 16436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" ", fields, 10, status); 16456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 16466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==2); 16476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==""); 16486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]==""); 16496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[0] = "foo"; 16516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("", fields, 10, status); 16526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 16536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==0); 16546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="foo"); 16556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 16576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // split, with a pattern with (capture) 16596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = RegexPattern::compile(UNICODE_STRING_SIMPLE("<(\\w*)>"), pe, status); 16606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 16616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 16636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status); 16646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 16656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==7); 16666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==""); 16676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 16686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 16696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="b"); 16706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="the time"); 16716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]=="c"); 16726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[6]==""); 16736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status==U_ZERO_ERROR); 16746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" <a>Now is <b>the time<c>", fields, 10, status); 16766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 16776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==7); 16786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==" "); 16796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 16806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 16816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="b"); 16826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="the time"); 16836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]=="c"); 16846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[6]==""); 16856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 16876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[6] = "foo"; 16886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" <a>Now is <b>the time<c>", fields, 6, status); 16896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 16906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==6); 16916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==" "); 16926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 16936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 16946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="b"); 16956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="the time"); 16966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]==""); // All text following "<c>" field delimiter. 16976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[6]=="foo"); 16986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 17006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[5] = "foo"; 17016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" <a>Now is <b>the time<c>", fields, 5, status); 17026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==5); 17046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==" "); 17056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 17066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 17076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="b"); 17086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="the time<c>"); 17096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]=="foo"); 17106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 17126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[5] = "foo"; 17136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" <a>Now is <b>the time", fields, 5, status); 17146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==5); 17166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==" "); 17176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 17186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 17196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="b"); 17206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="the time"); 17216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]=="foo"); 17226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 17246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" <a>Now is <b>the time<c>", fields, 4, status); 17256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==4); 17276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==" "); 17286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 17296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 17306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="the time<c>"); 17316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 17326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 17336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = RegexPattern::compile("([-,])", pe, status); 17356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("1-10,20", fields, 10, status); 17376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==5); 17396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="1"); 17406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="-"); 17416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="10"); 17426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]==","); 17436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="20"); 17446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 17456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test split of string with empty trailing fields 17476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = RegexPattern::compile(",", pe, status); 17486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("a,b,c,", fields, 10, status); 17506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==4); 17526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="a"); 17536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="b"); 17546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="c"); 17556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]==""); 17566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("a,,,", fields, 10, status); 17586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==4); 17606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="a"); 17616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]==""); 17626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]==""); 17636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]==""); 17646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 17656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Split Separator with zero length match. 17676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = RegexPattern::compile(":?", pe, status); 17686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("abc", fields, 10, status); 17706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==5); 17726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==""); 17736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 17746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="b"); 17756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="c"); 17766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]==""); 17776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 17796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 17816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // RegexPattern::pattern() 17826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 17836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = new RegexPattern(); 17846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1->pattern() == ""); 17856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 17866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = RegexPattern::compile("(Hello, world)*", pe, status); 17886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1->pattern() == "(Hello, world)*"); 17906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 17916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 17946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // classID functions 17956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 17966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = RegexPattern::compile("(Hello, world)*", pe, status); 17976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 17986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1->getDynamicClassID() == RegexPattern::getStaticClassID()); 17996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1->getDynamicClassID() != NULL); 18006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString Hello("Hello, world."); 18016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *m = pat1->matcher(Hello, status); 18026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1->getDynamicClassID() != m->getDynamicClassID()); 18036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m->getDynamicClassID() == RegexMatcher::getStaticClassID()); 18046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m->getDynamicClassID() != NULL); 18056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete m; 18066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 18076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 18096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 18116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 18126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// API_Match_UTF8 Test that the alternate engine for class RegexMatcher 18136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// is present and working, but excluding functions 18146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// implementing replace operations. 18156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 18166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 18176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Match_UTF8() { 18186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 18196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 18206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags = 0; 18216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 18236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Debug - slide failing test cases early 18246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 18256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 18266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 18276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 18296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 18306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 18326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Simple pattern compilation 18336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 18346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 18356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText re = UTEXT_INITIALIZER; 18366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&re, "abc", -1, &status); 18376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_VERBOSE_TEXT(&re); 18386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat2; 18396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat2 = RegexPattern::compile(&re, flags, pe, status); 18406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 18416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText input1 = UTEXT_INITIALIZER; 18436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText input2 = UTEXT_INITIALIZER; 18446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText empty = UTEXT_INITIALIZER; 18456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &status); 18466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_VERBOSE_TEXT(&input1); 18476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status); 18486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_VERBOSE_TEXT(&input2); 18496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUChars(&empty, NULL, 0, &status); 18506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */ 18526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t input2Len = strlen("not abc"); 18536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 18566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Matcher creation and reset. 18576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 18586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1); 18596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 18606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(status) == TRUE); 18616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abcdefthisisatest[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x00 }; /* abcdef this is a test */ 18626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); 18636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(&input2); 18646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(status) == FALSE); 18656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_notabc[] = { 0x6e, 0x6f, 0x74, 0x20, 0x61, 0x62, 0x63, 0x00 }; /* not abc */ 18666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_notabc, m1->inputText()); 18676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(&input1); 18686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); 18696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(status) == TRUE); 18706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(&empty); 18716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(status) == FALSE); 18726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(utext_nativeLength(&empty) == 0); 18736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 18756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // reset(pos, status) 18766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 18776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(&input1); 18786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(4, status); 18796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 18806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); 18816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(status) == TRUE); 18826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(-1, status); 18846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 18856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 18866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(0, status); 18886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 18896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 18906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(input1Len-1, status); 18926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 18936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 18946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(input1Len, status); 18966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 18976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 18986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(input1Len+1, status); 19006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 19016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 19026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 19046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match(pos, status) 19056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 19066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(&input2); 19076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(4, status) == TRUE); 19086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(); 19096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(3, status) == FALSE); 19106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(); 19116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(5, status) == FALSE); 19126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(4, status) == TRUE); 19136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(-1, status) == FALSE); 19146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 19156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Match() at end of string should fail, but should not 19176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // be an error. 19186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 19196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(input2Len, status) == FALSE); 19206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 19216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Match beyond end of string should fail with an error. 19236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 19246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->matches(input2Len+1, status) == FALSE); 19256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 19266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Successful match at end of string. 19286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 19296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 19306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m("A?", 0, status); // will match zero length string. 19316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 19326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(&input1); 19336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.matches(input1Len, status) == TRUE); 19346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 19356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(&empty); 19366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.matches(0, status) == TRUE); 19376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 19386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 19426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // lookingAt(pos, status) 19436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 19446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 19456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m1->reset(&input2); // "not abc" 19466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 19476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(5, status) == FALSE); 19486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(3, status) == FALSE); 19496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 19506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE); 19516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 19526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 19536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE); 19546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 19556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE); 19566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 19576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete m1; 19596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat2; 19606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&re); 19626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&input1); 19636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&input2); 19646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&empty); 19656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 19696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Capture Group. 19706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // RegexMatcher::start(); 19716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // RegexMatcher::end(); 19726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // RegexMatcher::groupCount(); 19736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 19746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 19756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags=0; 19766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 19776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 19786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText re=UTEXT_INITIALIZER; 19796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */ 19806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&re, str_01234567_pat, -1, &status); 19816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 19836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 19846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText input = UTEXT_INITIALIZER; 19866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ 19876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&input, str_0123456789, -1, &status); 19886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *matcher = &pat->matcher(status)->reset(&input); 19906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 19916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->lookingAt(status) == TRUE); 19926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org static const int32_t matchStarts[] = {0, 2, 4, 8}; 19936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org static const int32_t matchEnds[] = {10, 8, 6, 10}; 19946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 19956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<4; i++) { 19966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t actualStart = matcher->start(i, status); 19976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 19986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (actualStart != matchStarts[i]) { 19996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest failure at %s:%d, index %d. Expected %d, got %d\n", 20006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org __FILE__, __LINE__, i, matchStarts[i], actualStart); 20016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t actualEnd = matcher->end(i, status); 20036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 20046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (actualEnd != matchEnds[i]) { 20056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("RegexTest failure at %s:%d index %d. Expected %d, got %d\n", 20066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org __FILE__, __LINE__, i, matchEnds[i], actualEnd); 20076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(0, status) == matcher->start(status)); 20116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->end(0, status) == matcher->end(status)); 20126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 20146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 20156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(); 20166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); 20176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->lookingAt(status); 20196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString dest; 20216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText destText = UTEXT_INITIALIZER; 20226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&destText, &dest, &status); 20236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText *result; 20246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ 20256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test shallow-clone API 20266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t group_len; 20276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->group((UText *)NULL, group_len, status); 20286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 20296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 20306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 20316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->group(0, &destText, group_len, status); 20326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 20336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 20346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 20356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // destText is now immutable, reopen it 20366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&destText); 20376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&destText, &dest, &status); 20386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->group(0, NULL, status); 20406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 20416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 20426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 20436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->group(0, &destText, status); 20446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 20456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 20466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 20476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->group(1, NULL, status); 20496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 20506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */ 20516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_234567, result); 20526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 20536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->group(1, &destText, status); 20546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 20556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 20566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_234567, result); 20576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->group(2, NULL, status); 20596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 20606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */ 20616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_45, result); 20626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 20636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->group(2, &destText, status); 20646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 20656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 20666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_45, result); 20676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->group(3, NULL, status); 20696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 20706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */ 20716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_89, result); 20726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 20736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->group(3, &destText, status); 20746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 20756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 20766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_89, result); 20776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 20796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 20806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(); 20816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); 20826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete matcher; 20846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat; 20856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&destText); 20876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&input); 20886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&re); 20896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 20926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // find 20936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 20946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 20956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags=0; 20966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 20976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 20986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText re=UTEXT_INITIALIZER; 20996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 21006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&re, str_abc, -1, &status); 21016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 21036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 21046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText input = UTEXT_INITIALIZER; 21056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ 21066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&input, str_abcabcabc, -1, &status); 21076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 012345678901234567 21086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *matcher = &pat->matcher(status)->reset(&input); 21106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 21116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 21126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 1); 21136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 21146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 6); 21156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 21166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 12); 21176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find() == FALSE); 21186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find() == FALSE); 21196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(); 21216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 21226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 1); 21236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(0, status)); 21256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 1); 21266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(1, status)); 21276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 1); 21286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(2, status)); 21296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 6); 21306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(12, status)); 21316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 12); 21326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(13, status) == FALSE); 21336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(16, status) == FALSE); 21346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find(17, status) == FALSE); 21356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE); 21366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 21386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 21396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 21406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR); 21416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->groupCount() == 0); 21436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete matcher; 21456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat; 21466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&input); 21486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&re); 21496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 21536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // find, with \G in pattern (true if at the end of a previous match). 21546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 21556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 21566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags=0; 21576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 21586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 21596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText re=UTEXT_INITIALIZER; 21606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x00 }; /* .*?(?:(\\Gabc)|(abc)) */ 21616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&re, str_Gabcabc, -1, &status); 21626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 21646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 21666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText input = UTEXT_INITIALIZER; 21676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */ 21686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&input, str_abcabcabc, -1, &status); 21696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 012345678901234567 21706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *matcher = &pat->matcher(status)->reset(&input); 21726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 21736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 21746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 0); 21756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(1, status) == -1); 21766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(2, status) == 1); 21776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->find()); 21796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(status) == 4); 21806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(1, status) == 4); 21816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher->start(2, status) == -1); 21826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 21836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete matcher; 21856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat; 21866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&input); 21886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&re); 21896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 21926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // find with zero length matches, match position should bump ahead 21936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // to prevent loops. 21946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 21956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 21966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 21976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 21986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m("(?= ?)", 0, status); // This pattern will zero-length matches anywhere, 21996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // using an always-true look-ahead. 22006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 22016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText s = UTEXT_INITIALIZER; 22026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&s, " ", -1, &status); 22036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(&s); 22046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; ; i++) { 22056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (m.find() == FALSE) { 22066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 22076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.start(status) == i); 22096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.end(status) == i); 22106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(i==5); 22126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check that the bump goes over characters outside the BMP OK 22146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // "\\U00010001\\U00010002\\U00010003\\U00010004".unescape()...in UTF-8 22156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unsigned char aboveBMP[] = {0xF0, 0x90, 0x80, 0x81, 0xF0, 0x90, 0x80, 0x82, 0xF0, 0x90, 0x80, 0x83, 0xF0, 0x90, 0x80, 0x84, 0x00}; 22166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&s, (char *)aboveBMP, -1, &status); 22176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(&s); 22186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; ; i+=4) { 22196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (m.find() == FALSE) { 22206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 22216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.start(status) == i); 22236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.end(status) == i); 22246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(i==20); 22266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&s); 22286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 22306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // find() loop breaking test. 22316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // with pattern of /.?/, should see a series of one char matches, then a single 22326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match of zero length at the end of the input string. 22336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 22346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 22356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m(".?", 0, status); 22366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 22376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText s = UTEXT_INITIALIZER; 22386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&s, " ", -1, &status); 22396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(&s); 22406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; ; i++) { 22416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (m.find() == FALSE) { 22426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 22436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.start(status) == i); 22456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); 22466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(i==5); 22486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&s); 22506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 22546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Matchers with no input string behave as if they had an empty input string. 22556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 22566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 22586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 22596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m(".?", 0, status); 22606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 22616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.find()); 22626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.start(status) == 0); 22636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.input() == ""); 22646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 22666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 22676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *p = RegexPattern::compile(".", 0, status); 22686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *m = p->matcher(status); 22696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 22706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m->find() == FALSE); 22726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0); 22736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete m; 22746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete p; 22756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 22786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Regions 22796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 22806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 22816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 22826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText testPattern = UTEXT_INITIALIZER; 22836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText testText = UTEXT_INITIALIZER; 22846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status); 22856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_VERBOSE_TEXT(&testPattern); 22866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status); 22876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_VERBOSE_TEXT(&testText); 22886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m(&testPattern, &testText, 0, status); 22906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 22916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionStart() == 0); 22926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); 22936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 22946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 22956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.region(2,4, status); 22976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 22986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.matches(status)); 22996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.start(status)==2); 23006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.end(status)==4); 23016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 23026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(); 23046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionStart() == 0); 23056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); 23066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&testText, "short", -1, &status); 23086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_VERBOSE_TEXT(&testText); 23096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(&testText); 23106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionStart() == 0); 23116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short")); 23126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 23146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); 23156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 23166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.reset()); 23176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 23186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); 23206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 23216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.reset()); 23226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 23236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 23256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); 23266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 23276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.reset()); 23286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 23296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); 23316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 23326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(&m == &m.reset()); 23336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 23346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&testText); 23366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&testPattern); 23376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 23406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // hitEnd() and requireEnd() 23416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 23426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 23436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 23446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText testPattern = UTEXT_INITIALIZER; 23456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText testText = UTEXT_INITIALIZER; 23466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */ 23476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */ 23486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&testPattern, str_, -1, &status); 23496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&testText, str_aabb, -1, &status); 23506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m1(&testPattern, &testText, 0, status); 23526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1.lookingAt(status) == TRUE); 23536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1.hitEnd() == TRUE); 23546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m1.requireEnd() == FALSE); 23556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 23566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 23586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */ 23596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&testPattern, str_a, -1, &status); 23606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m2(&testPattern, &testText, 0, status); 23616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m2.lookingAt(status) == TRUE); 23626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m2.hitEnd() == FALSE); 23636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m2.requireEnd() == FALSE); 23646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 23656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 23676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */ 23686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&testPattern, str_dotstardollar, -1, &status); 23696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m3(&testPattern, &testText, 0, status); 23706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m3.lookingAt(status) == TRUE); 23716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m3.hitEnd() == TRUE); 23726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m3.requireEnd() == TRUE); 23736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 23746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&testText); 23766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&testPattern); 23776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 23796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 23826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// API_Replace_UTF8 API test for class RegexMatcher, testing the 23846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Replace family of functions. 23856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 23876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Replace_UTF8() { 23886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 23896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Replace 23906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 23916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags=0; 23926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 23936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 23946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText re=UTEXT_INITIALIZER; 23966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&re, "abc", -1, &status); 23976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_VERBOSE_TEXT(&re); 23986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 23996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ 24026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 012345678901234567 24036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText dataText = UTEXT_INITIALIZER; 24046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&dataText, data, -1, &status); 24056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_VERBOSE_TEXT(&dataText); 24076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText); 24086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 24106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Plain vanilla matches. 24116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 24126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString dest; 24136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText destText = UTEXT_INITIALIZER; 24146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&destText, &dest, &status); 24156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText *result; 24166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText replText = UTEXT_INITIALIZER; 24186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */ 24206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&replText, str_yz, -1, &status); 24216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_VERBOSE_TEXT(&replText); 24226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceFirst(&replText, NULL, status); 24236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */ 24256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); 24266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 24276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceFirst(&replText, &destText, status); 24286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 24306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); 24316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, NULL, status); 24336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_yzyzyz[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x00 }; /* .yz..yz...yz.. */ 24356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); 24366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 24376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 24396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, &destText, status); 24406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 24426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); 24436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 24456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Plain vanilla non-matches. 24466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 24476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */ 24486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&dataText, str_abxabxabx, -1, &status); 24496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(&dataText); 24506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceFirst(&replText, NULL, status); 24526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 24546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 24556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceFirst(&replText, &destText, status); 24566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 24586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 24596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, NULL, status); 24616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 24636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 24646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 24656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, &destText, status); 24666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 24686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 24696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 24716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Empty source string 24726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 24736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&dataText, NULL, 0, &status); 24746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(&dataText); 24756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceFirst(&replText, NULL, status); 24776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8("", result); 24796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 24806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceFirst(&replText, &destText, status); 24816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 24836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8("", result); 24846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, NULL, status); 24866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8("", result); 24886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 24896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, &destText, status); 24906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 24916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 24926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8("", result); 24936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 24956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Empty substitution string 24966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 24976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.." 24986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(&dataText); 24996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&replText, NULL, 0, &status); 25016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceFirst(&replText, NULL, status); 25026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */ 25046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); 25056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 25066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceFirst(&replText, &destText, status); 25076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 25096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); 25106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, NULL, status); 25126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_dots[] = { 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x00 }; /* ........ */ 25146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_dots, result); 25156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 25166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 25176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, &destText, status); 25186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 25206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_dots, result); 25216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 25236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match whole string 25246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 25256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 25266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&dataText, str_abc, -1, &status); 25276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(&dataText); 25286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_xyz[] = { 0x78, 0x79, 0x7a, 0x00 }; /* xyz */ 25306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&replText, str_xyz, -1, &status); 25316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceFirst(&replText, NULL, status); 25326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 25346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 25356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 25366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceFirst(&replText, &destText, status); 25376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 25396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 25406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, NULL, status); 25426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 25446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 25456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 25466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, &destText, status); 25476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 25496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 25506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 25526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Capture Group, simple case 25536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 25546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */ 25556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&re, str_add, -1, &status); 25566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status); 25576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* abcdefg */ 25606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&dataText, str_abcdefg, -1, &status); 25616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText); 25626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */ 25656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&replText, str_11, -1, &status); 25666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher2->replaceFirst(&replText, NULL, status); 25676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* bcbcdefg */ 25696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); 25706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 25716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 25726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher2->replaceFirst(&replText, &destText, status); 25736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 25756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); 25766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */ 25786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&replText, str_v, -1, &status); 25796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_VERBOSE_TEXT(&replText); 25806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher2->replaceFirst(&replText, NULL, status); 25816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg */ 25836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); 25846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 25856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 25866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher2->replaceFirst(&replText, &destText, status); 25876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 25896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); 25906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */ 25926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); 25936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher2->replaceFirst(&replText, NULL, status); 25946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 25956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */ 25966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); 25976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 25986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 25996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher2->replaceFirst(&replText, &destText, status); 26006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 26016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 26026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); 26036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx. */ 26056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001D7CF, MATHEMATICAL BOLD DIGIT ONE 26066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 012345678901234567890123456 26076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org supplDigitChars[22] = 0xF0; 26086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org supplDigitChars[23] = 0x9D; 26096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org supplDigitChars[24] = 0x9F; 26106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org supplDigitChars[25] = 0x8F; 26116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status); 26126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher2->replaceFirst(&replText, NULL, status); 26146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 26156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */ 26166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); 26176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 26186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 26196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher2->replaceFirst(&replText, &destText, status); 26206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 26216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 26226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); 26236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /* bad capture group number $5..." */ 26246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status); 26256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status)), U_INDEX_OUTOFBOUNDS_ERROR); 26266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); 26276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 26286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 26296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, status)), U_INDEX_OUTOFBOUNDS_ERROR); 26306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 26316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); 26326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 26346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Replacement String with \u hex escapes 26356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 26366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 26376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61, 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 abc 2 abc 3 */ 26386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33, 0x2d, 0x2d, 0x00 }; /* --\u0043-- */ 26396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status); 26406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&replText, str_u0043, -1, &status); 26416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(&dataText); 26426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, NULL, status); 26446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 26456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */ 26466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); 26476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 26486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 26496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, &destText, status); 26506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 26516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 26526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); 26536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 26556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */ 26566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&dataText, str_abc, -1, &status); 26576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */ 26586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&replText, str_U00010000, -1, &status); 26596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->reset(&dataText); 26606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A" 26626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 0123456789 26636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expected[2] = 0xF0; 26646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expected[3] = 0x90; 26656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expected[4] = 0x80; 26666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expected[5] = 0x80; 26676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, NULL, status); 26696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 26706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); 26716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(result); 26726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 26736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = matcher->replaceAll(&replText, &destText, status); 26746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 26756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &destText); 26766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); 26776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: need more through testing of capture substitutions. 26796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Bug 4057 26816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 26826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 26836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 26846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65, 0x00 }; /* ss(.*?)ee */ 26856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66, 0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ss stuff ee fin */ 26866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ 26876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&re, str_ssee, -1, &status); 26886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&dataText, str_blah, -1, &status); 26896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&replText, str_ooh, -1, &status); 26906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher m(&re, 0, status); 26926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 26936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString result; 26956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText resultText = UTEXT_INITIALIZER; 26966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&resultText, &result, &status); 26976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Multiple finds do NOT bump up the previous appendReplacement postion. 26996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(&dataText); 27006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(); 27016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(); 27026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.appendReplacement(&resultText, &replText, status); 27036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 27046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_blah2[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ 27056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_blah2, &resultText); 27066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // After a reset into the interior of a string, appendReplacement still starts at beginning. 27086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 27096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result.truncate(0); 27106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&resultText, &result, &status); 27116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(10, status); 27126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(); 27136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(); 27146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.appendReplacement(&resultText, &replText, status); 27156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 27166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_blah3[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ 27176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_blah3, &resultText); 27186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // find() at interior of string, appendReplacement still starts at beginning. 27206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 27216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result.truncate(0); 27226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&resultText, &result, &status); 27236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.reset(); 27246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(10, status); 27256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.find(); 27266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.appendReplacement(&resultText, &replText, status); 27276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 27286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ 27296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText); 27306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m.appendTail(&resultText, status); 27326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */ 27336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText); 27346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&resultText); 27366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete matcher2; 27396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat2; 27406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete matcher; 27416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat; 27426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&dataText); 27446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&replText); 27456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&destText); 27466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&re); 27476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 27486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 27516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 27526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// API_Pattern_UTF8 Test that the API for class RegexPattern is 27536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// present and nominally working. 27546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 27556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 27566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Pattern_UTF8() { 27576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern pata; // Test default constructor to not crash. 27586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern patb; 27596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pata == patb); 27616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pata == pata); 27626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText re1 = UTEXT_INITIALIZER; 27646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText re2 = UTEXT_INITIALIZER; 27656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 27666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 27676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */ 27696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */ 27706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&re1, str_abcalmz, -1, &status); 27716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&re2, str_def, -1, &status); 27726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status); 27746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status); 27756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 27766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1 == *pat1); 27776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1 != pata); 27786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Assign 27806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patb = *pat1; 27816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(patb == *pat1); 27826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Copy Construct 27846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern patc(*pat1); 27856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(patc == *pat1); 27866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(patb == patc); 27876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1 != pat2); 27886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patb = *pat2; 27896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(patb != patc); 27906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(patb == *pat2); 27916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compile with no flags. 27936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat1a = RegexPattern::compile(&re1, pe, status); 27946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1a == *pat1); 27956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1a->flags() == 0); 27976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compile with different flags should be not equal 27996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat1b = RegexPattern::compile(&re1, UREGEX_CASE_INSENSITIVE, pe, status); 28006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 28016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1b != *pat1a); 28036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE); 28046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1a->flags() == 0); 28056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1b; 28066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // clone 28086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat1c = pat1->clone(); 28096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1c == *pat1); 28106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(*pat1c != *pat2); 28116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1c; 28136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1a; 28146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 28156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat2; 28166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&re1); 28186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&re2); 28196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 28226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Verify that a matcher created from a cloned pattern works. 28236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (Jitterbug 3423) 28246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 28256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 28266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 28276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText pattern = UTEXT_INITIALIZER; 28286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */ 28296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&pattern, str_pL, -1, &status); 28306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status); 28326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pClone = pSource->clone(); 28336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pSource; 28346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *mFromClone = pClone->matcher(status); 28356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 28366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText input = UTEXT_INITIALIZER; 28386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */ 28396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&input, str_HelloWorld, -1, &status); 28406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mFromClone->reset(&input); 28416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(mFromClone->find() == TRUE); 28426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(mFromClone->group(status) == "Hello"); 28436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(mFromClone->find() == TRUE); 28446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(mFromClone->group(status) == "World"); 28456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(mFromClone->find() == FALSE); 28466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete mFromClone; 28476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pClone; 28486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&input); 28506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&pattern); 28516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 28546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // matches convenience API 28556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 28566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 28576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 28586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText pattern = UTEXT_INITIALIZER; 28596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText input = UTEXT_INITIALIZER; 28606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */ 28626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&input, str_randominput, -1, &status); 28636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */ 28656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&pattern, str_dotstar, -1, &status); 28666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE); 28676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 28686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 28706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&pattern, str_abc, -1, &status); 28716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE); 28726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 28736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */ 28756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&pattern, str_nput, -1, &status); 28766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE); 28776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 28786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&pattern, str_randominput, -1, &status); 28806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE); 28816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 28826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */ 28846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&pattern, str_u, -1, &status); 28856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE); 28866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 28876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&input, str_abc, -1, &status); 28896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&pattern, str_abc, -1, &status); 28906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 28916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); 28926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 28936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&input); 28956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&pattern); 28966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 29006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Split() 29016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 29026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 29036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */ 29046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&re1, str_spaceplus, -1, &status); 29056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = RegexPattern::compile(&re1, pe, status); 29066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 29076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString fields[10]; 29086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t n; 29106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("Now is the time", fields, 10, status); 29116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 29126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==4); 29136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="Now"); 29146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="is"); 29156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="the"); 29166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="time"); 29176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]==""); 29186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("Now is the time", fields, 2, status); 29206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 29216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==2); 29226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="Now"); 29236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="is the time"); 29246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="the"); // left over from previous test 29256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[1] = "*"; 29276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 29286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("Now is the time", fields, 1, status); 29296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 29306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==1); 29316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="Now is the time"); 29326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="*"); 29336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 29346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" Now is the time ", fields, 10, status); 29366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 29376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==6); 29386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==""); 29396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="Now"); 29406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="is"); 29416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="the"); 29426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="time"); 29436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]==""); 29446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[6]==""); 29456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[2] = "*"; 29476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" ", fields, 10, status); 29486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 29496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==2); 29506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==""); 29516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]==""); 29526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="*"); 29536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[0] = "foo"; 29556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("", fields, 10, status); 29566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 29576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==0); 29586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="foo"); 29596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 29616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // split, with a pattern with (capture) 29636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&re1, "<(\\w*)>", -1, &status); 29646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = RegexPattern::compile(&re1, pe, status); 29656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 29666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 29686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[6] = fields[7] = "*"; 29696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status); 29706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 29716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==7); 29726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==""); 29736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 29746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 29756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="b"); 29766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="the time"); 29776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]=="c"); 29786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[6]==""); 29796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[7]=="*"); 29806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status==U_ZERO_ERROR); 29816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[6] = fields[7] = "*"; 29836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" <a>Now is <b>the time<c>", fields, 10, status); 29846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 29856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==7); 29866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==" "); 29876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 29886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 29896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="b"); 29906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="the time"); 29916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]=="c"); 29926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[6]==""); 29936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[7]=="*"); 29946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 29966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[6] = "foo"; 29976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" <a>Now is <b>the time<c> ", fields, 6, status); 29986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 29996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==6); 30006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==" "); 30016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 30026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 30036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="b"); 30046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="the time"); 30056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]==" "); 30066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[6]=="foo"); 30076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 30096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[5] = "foo"; 30106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" <a>Now is <b>the time<c>", fields, 5, status); 30116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 30126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==5); 30136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==" "); 30146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 30156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 30166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="b"); 30176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="the time<c>"); 30186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]=="foo"); 30196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 30216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[5] = "foo"; 30226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" <a>Now is <b>the time", fields, 5, status); 30236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 30246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==5); 30256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==" "); 30266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 30276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 30286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="b"); 30296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="the time"); 30306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[5]=="foo"); 30316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 30336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split(" <a>Now is <b>the time<c>", fields, 4, status); 30346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 30356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==4); 30366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]==" "); 30376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="a"); 30386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="Now is "); 30396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]=="the time<c>"); 30406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 30416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 30426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&re1, "([-,])", -1, &status); 30446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = RegexPattern::compile(&re1, pe, status); 30456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 30466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n = pat1->split("1-10,20", fields, 10, status); 30476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 30486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(n==5); 30496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[0]=="1"); 30506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[1]=="-"); 30516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[2]=="10"); 30526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[3]==","); 30536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(fields[4]=="20"); 30546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 30556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 30586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // RegexPattern::pattern() and patternText() 30596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 30606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = new RegexPattern(); 30616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(pat1->pattern() == ""); 30626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText(status)); 30636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 30646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *helloWorldInvariant = "(Hello, world)*"; 30656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status); 30666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pat1 = RegexPattern::compile(&re1, pe, status); 30676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 30686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UNISTR(pat1->pattern(),"(Hello, world)*"); 30696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status)); 30706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 30716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&re1); 30736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 30746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 30776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 30786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Extended A more thorough check for features of regex patterns 30796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The test cases are in a separate data file, 30806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// source/tests/testdata/regextst.txt 30816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// A description of the test data format is included in that file. 30826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 30836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 30846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst char * 30866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexTest::getPath(char buffer[2048], const char *filename) { 30876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 30886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *testDataDirectory = IntlTest::getSourceTestData(status); 30896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 30906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("ERROR: loadTestData() failed - %s", u_errorName(status)); 30916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 30926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org strcpy(buffer, testDataDirectory); 30956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org strcat(buffer, filename); 30966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buffer; 30976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 30986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Extended() { 31006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char tdd[2048]; 31016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *srcPath; 31026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 31036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t lineNum = 0; 31046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Open and read the test data file. 31076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org srcPath=getPath(tdd, "regextst.txt"); 31096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(srcPath==NULL) { 31106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; /* something went wrong, error already output */ 31116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t len; 31146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *testData = ReadAndConvertFile(srcPath, len, "utf-8", status); 31156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 31166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; /* something went wrong, error already output */ 31176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Put the test data into a UnicodeString 31216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testString(FALSE, testData, len); 31236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher quotedStuffMat(UNICODE_STRING_SIMPLE("\\s*([\\'\\\"/])(.*?)\\1"), 0, status); 31256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher commentMat (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status); 31266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher flagsMat (UNICODE_STRING_SIMPLE("\\s*([ixsmdteDEGLMQvabtyYzZ2-9]*)([:letter:]*)"), 0, status); 31276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher lineMat(UNICODE_STRING_SIMPLE("(.*?)\\r?\\n"), testString, 0, status); 31296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testPattern; // The pattern for test from the test file. 31306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testFlags; // the flags for a test. 31316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString matchString; // The marked up string to be used as input 31326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)){ 31346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dataerrln("Construct RegexMatcher() error - %s", u_errorName(status)); 31356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete [] testData; 31366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 31376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop over the test data file, once per line. 31416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (lineMat.find()) { 31436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lineNum++; 31446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 31456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("%s:%d: ICU Error \"%s\"", srcPath, lineNum, u_errorName(status)); 31466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 31496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testLine = lineMat.group(1, status); 31506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (testLine.length() == 0) { 31516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 31526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Parse the test line. Skip blank and comment only lines. 31566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Separate out the three main fields - pattern, flags, target. 31576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org commentMat.reset(testLine); 31606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (commentMat.lookingAt(status)) { 31616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This line is a comment, or blank. 31626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 31636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Pull out the pattern field, remove it from the test file line. 31676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org quotedStuffMat.reset(testLine); 31696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (quotedStuffMat.lookingAt(status)) { 31706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org testPattern = quotedStuffMat.group(2, status); 31716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org testLine.remove(0, quotedStuffMat.end(0, status)); 31726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 31736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Bad pattern (missing quotes?) at %s:%d", srcPath, lineNum); 31746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 31756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Pull out the flags from the test file line. 31806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flagsMat.reset(testLine); 31826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flagsMat.lookingAt(status); // Will always match, possibly an empty string. 31836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org testFlags = flagsMat.group(1, status); 31846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flagsMat.group(2, status).length() > 0) { 31856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Bad Match flag at line %d. Scanning %c\n", 31866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lineNum, flagsMat.group(2, status).charAt(0)); 31876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 31886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org testLine.remove(0, flagsMat.end(0, status)); 31906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Pull out the match string, as a whole. 31936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We'll process the <tags> later. 31946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 31956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org quotedStuffMat.reset(testLine); 31966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (quotedStuffMat.lookingAt(status)) { 31976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchString = quotedStuffMat.group(2, status); 31986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org testLine.remove(0, quotedStuffMat.end(0, status)); 31996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 32006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Bad match string at test file line %d", lineNum); 32016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 32026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 32056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The only thing left from the input line should be an optional trailing comment. 32066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 32076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org commentMat.reset(testLine); 32086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (commentMat.lookingAt(status) == FALSE) { 32096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Line %d: unexpected characters at end of test line.", lineNum); 32106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 32116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 32146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Run the test 32156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 32166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regex_find(testPattern, testFlags, matchString, srcPath, lineNum); 32176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete [] testData; 32206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 32226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 32266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 32276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// regex_find(pattern, flags, inputString, lineNumber) 32286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 32296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Function to run a single test from the Extended (data driven) tests. 32306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// See file test/testdata/regextst.txt for a description of the 32316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// pattern and inputString fields, and the allowed flags. 32326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// lineNumber is the source line in regextst.txt of the test. 32336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 32346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 32356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Set a value into a UVector at position specified by a decimal number in 32386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// a UnicodeString. This is a utility function needed by the actual test function, 32396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// which follows. 32406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void set(UVector &vec, int32_t val, UnicodeString index) { 32416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 32426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t idx = 0; 32436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int32_t i=0; i<index.length(); i++) { 32446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t d=u_charDigitValue(index.charAt(i)); 32456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (d<0) {return;} 32466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org idx = idx*10 + d; 32476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (vec.size()<idx+1) {vec.addElement(-1, status);} 32496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org vec.setElementAt(val, idx); 32506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 32516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void setInt(UVector &vec, int32_t val, int32_t idx) { 32536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status=U_ZERO_ERROR; 32546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (vec.size()<idx+1) {vec.addElement(-1, status);} 32556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org vec.setElementAt(val, idx); 32566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 32576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool utextOffsetToNative(UText *utext, int32_t unistrOffset, int32_t& nativeIndex) 32596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 32606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool couldFind = TRUE; 32616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(utext, 0); 32626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i = 0; 32636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (i < unistrOffset) { 32646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(utext); 32656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c != U_SENTINEL) { 32666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i += U16_LENGTH(c); 32676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 32686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org couldFind = FALSE; 32696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nativeIndex = (int32_t)UTEXT_GETNATIVEINDEX(utext); 32736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return couldFind; 32746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 32756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::regex_find(const UnicodeString &pattern, 32786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString &flags, 32796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString &inputString, 32806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *srcPath, 32816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t line) { 32826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString unEscapedInput; 32836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString deTaggedInput; 32846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t patternUTF8Length, inputUTF8Length; 32866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *patternChars = NULL, *inputChars = NULL; 32876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText patternText = UTEXT_INITIALIZER; 32886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText inputText = UTEXT_INITIALIZER; 32896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *UTF8Converter = NULL; 32906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 32926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 32936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *parsePat = NULL; 32946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *parseMatcher = NULL; 32956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *callerPattern = NULL, *UTF8Pattern = NULL; 32966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *matcher = NULL, *UTF8Matcher = NULL; 32976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVector groupStarts(status); 32986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVector groupEnds(status); 32996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVector groupStartsUTF8(status); 33006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVector groupEndsUTF8(status); 33016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isMatch = FALSE, isUTF8Match = FALSE; 33026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool failed = FALSE; 33036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t numFinds; 33046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 33056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool useMatchesFunc = FALSE; 33066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool useLookingAtFunc = FALSE; 33076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t regionStart = -1; 33086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t regionEnd = -1; 33096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t regionStartUTF8 = -1; 33106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t regionEndUTF8 = -1; 33116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 33146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compile the caller's pattern 33156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 33166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t bflags = 0; 33176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x69) >= 0) { // 'i' flag 33186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bflags |= UREGEX_CASE_INSENSITIVE; 33196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x78) >= 0) { // 'x' flag 33216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bflags |= UREGEX_COMMENTS; 33226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x73) >= 0) { // 's' flag 33246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bflags |= UREGEX_DOTALL; 33256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag 33276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bflags |= UREGEX_MULTILINE; 33286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag 33316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES; 33326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag 33346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bflags |= UREGEX_UNIX_LINES; 33356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag 33376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bflags |= UREGEX_LITERAL; 33386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org callerPattern = RegexPattern::compile(pattern, bflags, pe, status); 33426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status != U_ZERO_ERROR) { 33436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org #if UCONFIG_NO_BREAK_ITERATION==1 33446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 'v' test flag means that the test pattern should not compile if ICU was configured 33456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // to not include break iteration. RBBI is needed for Unicode word boundaries. 33466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) { 33476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 33486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org #endif 33506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E' 33516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Expected pattern compilation error. 33526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd' 33536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org logln("Pattern Compile returns \"%s\"", u_errorName(status)); 33546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 33566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 33576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Unexpected pattern compilation error. 33586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dataerrln("Line %d: error %s compiling pattern.", line, u_errorName(status)); 33596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 33606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTF8Converter = ucnv_open("UTF8", &status); 33646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); 33656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status); 33676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; // buffer overflow 33686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patternChars = new char[patternUTF8Length+1]; 33696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status); 33706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status); 33716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status == U_ZERO_ERROR) { 33736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status); 33746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status != U_ZERO_ERROR) { 33766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_BREAK_ITERATION==1 33776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 'v' test flag means that the test pattern should not compile if ICU was configured 33786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // to not include break iteration. RBBI is needed for Unicode word boundaries. 33796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) { 33806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 33816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 33836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E' 33846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Expected pattern compilation error. 33856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd' 33866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(status)); 33876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 33896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 33906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Unexpected pattern compilation error. 33916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Line %d: error %s compiling pattern. (UTF8)", line, u_errorName(status)); 33926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 33936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTF8Pattern == NULL) { 33986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine 33996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line); 34006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 34016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag 34046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPatternDump(callerPattern); 34056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag 34086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("%s, Line %d: Expected, but did not get, a pattern compilation error.", srcPath, line); 34096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 34106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 34146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Number of times find() should be called on the test string, default to 1 34156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 34166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org numFinds = 1; 34176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=2; i<=9; i++) { 34186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)(0x30 + i)) >= 0) { // digit flag 34196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (numFinds != 1) { 34206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Line %d: more than one digit flag. Scanning %d.", line, i); 34216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 34226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org numFinds = i; 34246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 'M' flag. Use matches() instead of find() 34286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x4d) >= 0) { 34296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org useMatchesFunc = TRUE; 34306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x4c) >= 0) { 34326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org useLookingAtFunc = TRUE; 34336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 34366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Find the tags in the input data, remove them, and record the group boundary 34376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // positions. 34386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 34396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org parsePat = RegexPattern::compile("<(/?)(r|[0-9]+)>", 0, pe, status); 34406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS_L(line); 34416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unEscapedInput = inputString.unescape(); 34436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org parseMatcher = parsePat->matcher(unEscapedInput, status); 34446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS_L(line); 34456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(parseMatcher->find()) { 34466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org parseMatcher->appendReplacement(deTaggedInput, "", status); 34476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 34486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString groupNum = parseMatcher->group(2, status); 34496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupNum == "r") { 34506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // <r> or </r>, a region specification within the string 34516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (parseMatcher->group(1, status) == "/") { 34526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regionEnd = deTaggedInput.length(); 34536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 34546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regionStart = deTaggedInput.length(); 34556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 34576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // <digits> or </digits>, a group match boundary tag. 34586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (parseMatcher->group(1, status) == "/") { 34596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set(groupEnds, deTaggedInput.length(), groupNum); 34606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 34616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set(groupStarts, deTaggedInput.length(), groupNum); 34626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org parseMatcher->appendTail(deTaggedInput); 34666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_L(groupStarts.size() == groupEnds.size(), line); 34676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((regionStart>=0 || regionEnd>=0) && (regionStart<0 || regionStart>regionEnd)) { 34686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("mismatched <r> tags"); 34696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 34706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 34716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 34746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Configure the matcher according to the flags specified with this test. 34756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 34766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher = callerPattern->matcher(deTaggedInput, status); 34776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS_L(line); 34786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag 34796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->setTrace(TRUE); 34806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTF8Pattern != NULL) { 34836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status); 34846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; // buffer overflow 34856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputChars = new char[inputUTF8Length+1]; 34866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, status); 34876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status); 34886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status == U_ZERO_ERROR) { 34906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText); 34916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS_L(line); 34926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTF8Matcher == NULL) { 34956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine 34966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line); 34976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 34986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 35026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Generate native indices for UTF8 versions of region and capture group info 35036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 35046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTF8Matcher != NULL) { 35056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8); 35066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8); 35076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fill out the native index UVector info. 35096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Only need 1 loop, from above we know groupStarts.size() = groupEnds.size() 35106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<groupStarts.size(); i++) { 35116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t start = groupStarts.elementAti(i); 35126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting 35136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (start >= 0) { 35146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t startUTF8; 35156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!utextOffsetToNative(&inputText, start, startUTF8)) { 35166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: could not find native index for group start %d. UTF16 index %d", line, i, start); 35176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 35186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 35196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setInt(groupStartsUTF8, startUTF8, i); 35216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t end = groupEnds.elementAti(i); 35246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting 35256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (end >= 0) { 35266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t endUTF8; 35276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!utextOffsetToNative(&inputText, end, endUTF8)) { 35286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: could not find native index for group end %d. UTF16 index %d", line, i, end); 35296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 35306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 35316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setInt(groupEndsUTF8, endUTF8, i); 35336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (regionStart>=0) { 35386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->region(regionStart, regionEnd, status); 35396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS_L(line); 35406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTF8Matcher != NULL) { 35416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTF8Matcher->region(regionStartUTF8, regionEndUTF8, status); 35426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS_L(line); 35436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x61) >= 0) { // 'a' anchoring bounds flag 35466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->useAnchoringBounds(FALSE); 35476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTF8Matcher != NULL) { 35486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTF8Matcher->useAnchoringBounds(FALSE); 35496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x62) >= 0) { // 'b' transparent bounds flag 35526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->useTransparentBounds(TRUE); 35536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTF8Matcher != NULL) { 35546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTF8Matcher->useTransparentBounds(TRUE); 35556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 35616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Do a find on the de-tagged input using the caller's pattern 35626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: error on count>1 and not find(). 35636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // error on both matches() and lookingAt(). 35646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 35656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<numFinds; i++) { 35666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (useMatchesFunc) { 35676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isMatch = matcher->matches(status); 35686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTF8Matcher != NULL) { 35696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isUTF8Match = UTF8Matcher->matches(status); 35706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (useLookingAtFunc) { 35726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isMatch = matcher->lookingAt(status); 35736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTF8Matcher != NULL) { 35746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isUTF8Match = UTF8Matcher->lookingAt(status); 35756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 35776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isMatch = matcher->find(); 35786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTF8Matcher != NULL) { 35796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isUTF8Match = UTF8Matcher->find(); 35806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->setTrace(FALSE); 35846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 35856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d. ICU ErrorCode is %s", u_errorName(status)); 35866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 35896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Match up the groups from the find() with the groups from the tags 35906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 35916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // number of tags should match number of groups from find operation. 35936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // matcher->groupCount does not include group 0, the entire match, hence the +1. 35946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // G option in test means that capture group data is not available in the 35956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // expected results, so the check needs to be suppressed. 35966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isMatch == FALSE && groupStarts.size() != 0) { 35976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dataerrln("Error at line %d: Match expected, but none found.", line); 35986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 35996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 36006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (UTF8Matcher != NULL && isUTF8Match == FALSE && groupStarts.size() != 0) { 36016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: Match expected, but none found. (UTF8)", line); 36026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 36046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flags.indexOf((UChar)0x47 /*G*/) >= 0) { 36076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Only check for match / no match. Don't check capture groups. 36086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isMatch && groupStarts.size() == 0) { 36096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: No match expected, but one found.", line); 36106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (UTF8Matcher != NULL && isUTF8Match && groupStarts.size() == 0) { 36126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: No match expected, but one found. (UTF8)", line); 36136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 36166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS_L(line); 36196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<=matcher->groupCount(); i++) { 36206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t expectedStart = (i >= groupStarts.size()? -1 : groupStarts.elementAti(i)); 36216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t expectedStartUTF8 = (i >= groupStartsUTF8.size()? -1 : groupStartsUTF8.elementAti(i)); 36226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (matcher->start(i, status) != expectedStart) { 36236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: incorrect start position for group %d. Expected %d, got %d", 36246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, i, expectedStart, matcher->start(i, status)); 36256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 36276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expectedStartUTF8) { 36286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: incorrect start position for group %d. Expected %d, got %d (UTF8)", 36296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, i, expectedStartUTF8, UTF8Matcher->start(i, status)); 36306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 36326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i)); 36356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i)); 36366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (matcher->end(i, status) != expectedEnd) { 36376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: incorrect end position for group %d. Expected %d, got %d", 36386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, i, expectedEnd, matcher->end(i, status)); 36396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Error on end position; keep going; real error is probably yet to come as group 36416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // end positions work from end of the input data towards the front. 36426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expectedEndUTF8) { 36436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: incorrect end position for group %d. Expected %d, got %d (UTF8)", 36446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, i, expectedEndUTF8, UTF8Matcher->end(i, status)); 36456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Error on end position; keep going; real error is probably yet to come as group 36476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // end positions work from end of the input data towards the front. 36486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ( matcher->groupCount()+1 < groupStarts.size()) { 36516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: Expected %d capture groups, found %d.", 36526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, groupStarts.size()-1, matcher->groupCount()); 36536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (UTF8Matcher != NULL && UTF8Matcher->groupCount()+1 < groupStarts.size()) { 36566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: Expected %d capture groups, found %d. (UTF8)", 36576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org line, groupStarts.size()-1, UTF8Matcher->groupCount()); 36586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == false 36626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->requireEnd() == TRUE) { 36636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE", line); 36646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == false 36666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTF8Matcher->requireEnd() == TRUE) { 36676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UTF8)", line); 36686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == true 36726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->requireEnd() == FALSE) { 36736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", line); 36746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) && // 'Y' flag: RequireEnd() == false 36766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTF8Matcher->requireEnd() == FALSE) { 36776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UTF8)", line); 36786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false 36826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->hitEnd() == TRUE) { 36836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line); 36846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false 36866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTF8Matcher->hitEnd() == TRUE) { 36876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)", line); 36886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true 36926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher->hitEnd() == FALSE) { 36936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line); 36946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true 36966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTF8Matcher->hitEnd() == FALSE) { 36976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE (UTF8)", line); 36986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org failed = TRUE; 36996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgcleanupAndReturn: 37036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (failed) { 37046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org infoln((UnicodeString)"\""+pattern+(UnicodeString)"\" " 37056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org +flags+(UnicodeString)" \""+inputString+(UnicodeString)"\""); 37066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // callerPattern->dump(); 37076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete parseMatcher; 37096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete parsePat; 37106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete UTF8Matcher; 37116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete UTF8Pattern; 37126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete matcher; 37136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete callerPattern; 37146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&inputText); 37166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete[] inputChars; 37176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&patternText); 37186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete[] patternChars; 37196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_close(UTF8Converter); 37206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 37216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 37266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 37276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Errors Check for error handling in patterns. 37286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 37296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 37306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Errors() { 37316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // \escape sequences that aren't implemented yet. 37326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org //REGEX_ERR("hex format \\x{abcd} not implemented", 1, 13, U_REGEX_UNIMPLEMENTED); 37336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Missing close parentheses 37356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("Comment (?# with no close", 1, 25, U_REGEX_MISMATCHED_PAREN); 37366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("Capturing Parenthesis(...", 1, 25, U_REGEX_MISMATCHED_PAREN); 37376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("Grouping only parens (?: blah blah", 1, 34, U_REGEX_MISMATCHED_PAREN); 37386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Extra close paren 37406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("Grouping only parens (?: blah)) blah", 1, 31, U_REGEX_MISMATCHED_PAREN); 37416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR(")))))))", 1, 1, U_REGEX_MISMATCHED_PAREN); 37426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("(((((((", 1, 7, U_REGEX_MISMATCHED_PAREN); 37436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look-ahead, Look-behind 37456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: add tests for unbounded length look-behinds. 37466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc(?<@xyz).*", 1, 7, U_REGEX_RULE_SYNTAX); // illegal construct 37476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Attempt to use non-default flags 37496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 37506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 37516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 37526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags = UREGEX_CANON_EQ | 37536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UREGEX_COMMENTS | UREGEX_DOTALL | 37546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UREGEX_MULTILINE; 37556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *pat1= RegexPattern::compile(".*", flags, pe, status); 37566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_REGEX_UNIMPLEMENTED); 37576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pat1; 37586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Quantifiers are allowed only after something that can be quantified. 37626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("+", 1, 1, U_REGEX_RULE_SYNTAX); 37636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc\ndef(*2)", 2, 5, U_REGEX_RULE_SYNTAX); 37646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc**", 1, 5, U_REGEX_RULE_SYNTAX); 37656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Mal-formed {min,max} quantifiers 37676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc{a,2}",1,5, U_REGEX_BAD_INTERVAL); 37686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc{4,2}",1,8, U_REGEX_MAX_LT_MIN); 37696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc{1,b}",1,7, U_REGEX_BAD_INTERVAL); 37706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc{1,,2}",1,7, U_REGEX_BAD_INTERVAL); 37716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc{1,2a}",1,8, U_REGEX_BAD_INTERVAL); 37726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc{222222222222222222222}",1,14, U_REGEX_NUMBER_TOO_BIG); 37736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc{5,50000000000}", 1, 17, U_REGEX_NUMBER_TOO_BIG); // Overflows int during scan 37746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc{5,687865858}", 1, 16, U_REGEX_NUMBER_TOO_BIG); // Overflows regex binary format 37756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("abc{687865858,687865859}", 1, 24, U_REGEX_NUMBER_TOO_BIG); 37766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Ticket 5389 37786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX); 37796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Invalid Back Reference \0 37816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // For ICU 3.8 and earlier 37826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // For ICU versions newer than 3.8, \0 introduces an octal escape. 37836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 37846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE); 37856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 37876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------- 37906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 37916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Read a text data file, convert it to UChars, and return the data 37926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// in one big UChar * buffer, which the caller must delete. 37936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 37946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 37956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen, 37966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *defEncoding, UErrorCode &status) { 37976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *retPtr = NULL; 37986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *fileBuf = NULL; 37996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter* conv = NULL; 38006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FILE *f = NULL; 38016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ulen = 0; 38036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 38046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return retPtr; 38056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 38086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Open the file. 38096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 38106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org f = fopen(fileName, "rb"); 38116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (f == 0) { 38126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dataerrln("Error opening test data file %s\n", fileName); 38136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_FILE_ACCESS_ERROR; 38146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 38156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 38176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Read it in 38186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 38196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t fileSize; 38206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t amt_read; 38216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fseek( f, 0, SEEK_END); 38236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileSize = ftell(f); 38246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileBuf = new char[fileSize]; 38256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fseek(f, 0, SEEK_SET); 38266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org amt_read = fread(fileBuf, 1, fileSize, f); 38276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (amt_read != fileSize || fileSize <= 0) { 38286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Error reading test data file."); 38296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanUpAndReturn; 38306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 38336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look for a Unicode Signature (BOM) on the data just read 38346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 38356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t signatureLength; 38366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char * fileBufC; 38376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char* encoding; 38386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileBufC = fileBuf; 38406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org encoding = ucnv_detectUnicodeSignature( 38416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileBuf, fileSize, &signatureLength, &status); 38426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(encoding!=NULL ){ 38436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileBufC += signatureLength; 38446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileSize -= signatureLength; 38456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 38466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org encoding = defEncoding; 38476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (strcmp(encoding, "utf-8") == 0) { 38486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("file %s is missing its BOM", fileName); 38496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 38536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Open a converter to take the rule file to UTF-16 38546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 38556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org conv = ucnv_open(encoding, &status); 38566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 38576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanUpAndReturn; 38586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 38616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Convert the rules to UChar. 38626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Preflight first to determine required buffer size. 38636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 38646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ulen = ucnv_toUChars(conv, 38656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, // dest, 38666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, // destCapacity, 38676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileBufC, 38686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileSize, 38696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &status); 38706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status == U_BUFFER_OVERFLOW_ERROR) { 38716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Buffer Overflow is expected from the preflight operation. 38726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 38736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retPtr = new UChar[ulen+1]; 38756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_toUChars(conv, 38766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retPtr, // dest, 38776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ulen+1, 38786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileBufC, 38796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileSize, 38806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &status); 38816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgcleanUpAndReturn: 38846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fclose(f); 38856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete[] fileBuf; 38866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_close(conv); 38876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 38886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); 38896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete []retPtr; 38906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retPtr = 0; 38916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ulen = 0; 38926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }; 38936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return retPtr; 38946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 38956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------- 38986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 38996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// PerlTests - Run Perl's regular expression tests 39006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The input file for this test is re_tests, the standard regular 39016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// expression test data distributed with the Perl source code. 39026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 39036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Here is Perl's description of the test data file: 39046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 39056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # The tests are in a separate file 't/op/re_tests'. 39066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # Each line in that file is a separate test. 39076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # There are five columns, separated by tabs. 39086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # 39096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # Column 1 contains the pattern, optionally enclosed in C<''>. 39106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # Modifiers can be put after the closing C<'>. 39116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # 39126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # Column 2 contains the string to be matched. 39136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # 39146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # Column 3 contains the expected result: 39156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # y expect a match 39166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # n expect no match 39176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # c expect an error 39186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # B test exposes a known bug in Perl, should be skipped 39196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # b test exposes a known bug in Perl, should be skipped if noamp 39206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # 39216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # Columns 4 and 5 are used only if column 3 contains C<y> or C<c>. 39226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # 39236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # Column 4 contains a string, usually C<$&>. 39246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # 39256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # Column 5 contains the expected result of double-quote 39266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # interpolating that string after the match, or start of error message. 39276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # 39286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # Column 6, if present, contains a reason why the test is skipped. 39296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # This is printed with "skipped", for harness to pick up. 39306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # 39316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # \n in the tests are interpolated, as are variables of the form ${\w+}. 39326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # 39336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # If you want to add a regular expression test that can't be expressed 39346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// # in this format, don't add it here: put it in op/pat.t instead. 39356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 39366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// For ICU, if field 3 contains an 'i', the test will be skipped. 39376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The test exposes is some known incompatibility between ICU and Perl regexps. 39386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// (The i is in addition to whatever was there before.) 39396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 39406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------- 39416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::PerlTests() { 39426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char tdd[2048]; 39436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *srcPath; 39446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 39456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 39466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Open and read the test data file. 39496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org srcPath=getPath(tdd, "re_tests.txt"); 39516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(srcPath==NULL) { 39526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; /* something went wrong, error already output */ 39536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t len; 39566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status); 39576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 39586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; /* something went wrong, error already output */ 39596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Put the test data into a UnicodeString 39636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testDataString(FALSE, testData, len); 39656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Regex to break the input file into lines, and strip the new lines. 39686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // One line per match, capture group one is the desired data. 39696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status); 39716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 39726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dataerrln("RegexPattern::compile() error"); 39736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 39746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher* lineMat = linePat->matcher(testDataString, status); 39766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Regex to split a test file line into fields. 39796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // There are six fields, separated by tabs. 39806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status); 39826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Regex to identify test patterns with flag settings, and to separate them. 39856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test patterns with flags look like 'pattern'i 39866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test patterns without flags are not quoted: pattern 39876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Coming out, capture group 2 is the pattern, capture group 3 is the flags. 39886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status); 39906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher* flagMat = flagPat->matcher(status); 39916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The Perl tests reference several perl-isms, which are evaluated/substituted 39946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // in the test data. Not being perl, this must be done explicitly. Here 39956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // are string constants and REs for these constructs. 39966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 39976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString nulnulSrc("${nulnul}"); 39986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString nulnul("\\u0000\\u0000", -1, US_INV); 39996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nulnul = nulnul.unescape(); 40006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString ffffSrc("${ffff}"); 40026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString ffff("\\uffff", -1, US_INV); 40036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ffff = ffff.unescape(); 40046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // regexp for $-[0], $+[2], etc. 40066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status); 40076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *groupsMat = groupsPat->matcher(status); 40086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // regexp for $0, $1, $2, etc. 40106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status); 40116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *cgMat = cgPat->matcher(status); 40126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 40156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Main Loop for the Perl Tests, runs once per line from the 40166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // test data file. 40176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 40186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t lineNum = 0; 40196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t skippedUnimplementedCount = 0; 40206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (lineMat->find()) { 40216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lineNum++; 40226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 40246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Get a line, break it into its fields, do the Perl 40256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // variable substitutions. 40266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 40276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString line = lineMat->group(1, status); 40286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString fields[7]; 40296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fieldPat->split(line, fields, 7, status); 40306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flagMat->reset(fields[0]); 40326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flagMat->matches(status); 40336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString pattern = flagMat->group(2, status); 40346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.findAndReplace("${bang}", "!"); 40356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000")); 40366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.findAndReplace(ffffSrc, ffff); 40376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 40396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Identify patterns that include match flag settings, 40406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // split off the flags, remove the extra quotes. 40416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 40426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString flagStr = flagMat->group(3, status); 40436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 40446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); 40456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 40466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags = 0; 40486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar UChar_c = 0x63; // Char constants for the flag letters. 40496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar UChar_i = 0x69; // (Damn the lack of Unicode support in C) 40506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar UChar_m = 0x6d; 40516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar UChar_x = 0x78; 40526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar UChar_y = 0x79; 40536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flagStr.indexOf(UChar_i) != -1) { 40546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flags |= UREGEX_CASE_INSENSITIVE; 40556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flagStr.indexOf(UChar_m) != -1) { 40576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flags |= UREGEX_MULTILINE; 40586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flagStr.indexOf(UChar_x) != -1) { 40606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flags |= UREGEX_COMMENTS; 40616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 40646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compile the test pattern. 40656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 40666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 40676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *testPat = RegexPattern::compile(pattern, flags, pe, status); 40686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status == U_REGEX_UNIMPLEMENTED) { 40696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 40706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test of a feature that is planned for ICU, but not yet implemented. 40716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // skip the test. 40726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org skippedUnimplementedCount++; 40736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 40746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 40756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 40766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 40796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Some tests are supposed to generate errors. 40806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Only report an error for tests that are supposed to succeed. 40816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fields[2].indexOf(UChar_c) == -1 && // Compilation is not supposed to fail AND 40826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[2].indexOf(UChar_i) == -1) // it's not an accepted ICU incompatibility 40836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 40846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status)); 40856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 40876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 40886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 40896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fields[2].indexOf(UChar_i) >= 0) { 40926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // ICU should skip this test. 40936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 40946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 40956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fields[2].indexOf(UChar_c) >= 0) { 40986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This pattern should have caused a compilation error, but didn't/ 40996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("line %d: Expected a pattern compile error, got success.", lineNum); 41006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 41016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 41026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 41056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // replace the Perl variables that appear in some of the 41066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match data strings. 41076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 41086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString matchString = fields[1]; 41096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchString.findAndReplace(nulnulSrc, nulnul); 41106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchString.findAndReplace(ffffSrc, ffff); 41116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Replace any \n in the match string with an actual new-line char. 41136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Don't do full unescape, as this unescapes more than Perl does, which 41146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // causes other spurious failures in the tests. 41156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 41166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 41206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Run the test, check for expected match/don't match result. 41216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 41226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *testMat = testPat->matcher(matchString, status); 41236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool found = testMat->find(); 41246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool expected = FALSE; 41256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fields[2].indexOf(UChar_y) >=0) { 41266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expected = TRUE; 41276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (expected != found) { 41296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("line %d: Expected %smatch, got %smatch", 41306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lineNum, expected?"":"no ", found?"":"no " ); 41316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 41326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Don't try to check expected results if there is no match. 41356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (Some have stuff in the expected fields) 41366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!found) { 41376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testMat; 41386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 41396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 41406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 41436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Interpret the Perl expression from the fourth field of the data file, 41446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // building up an ICU string from the results of the ICU match. 41456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The Perl expression will contain references to the results of 41466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // a regex match, including the matched string, capture group strings, 41476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // group starting and ending indicies, etc. 41486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 41496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString resultString; 41506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString perlExpr = fields[3]; 41516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if SUPPORT_MUTATING_INPUT_STRING 41526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org groupsMat->reset(perlExpr); 41536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cgMat->reset(perlExpr); 41546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 41556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (perlExpr.length() > 0) { 41576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !SUPPORT_MUTATING_INPUT_STRING 41586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Perferred usage. Reset after any modification to input string. 41596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org groupsMat->reset(perlExpr); 41606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cgMat->reset(perlExpr); 41616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 41626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (perlExpr.startsWith("$&")) { 41646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(testMat->group(status)); 41656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 2); 41666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (groupsMat->lookingAt(status)) { 41696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // $-[0] $+[2] etc. 41706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString digitString = groupsMat->group(2, status); 41716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t t = 0; 41726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 41736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString plusOrMinus = groupsMat->group(1, status); 41746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t matchPosition; 41756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (plusOrMinus.compare("+") == 0) { 41766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchPosition = testMat->end(groupNum, status); 41776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchPosition = testMat->start(groupNum, status); 41796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (matchPosition != -1) { 41816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ICU_Utility::appendNumber(resultString, matchPosition); 41826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, groupsMat->end(status)); 41846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (cgMat->lookingAt(status)) { 41876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // $1, $2, $3, etc. 41886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString digitString = cgMat->group(1, status); 41896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t t = 0; 41906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 41916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status)) { 41926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(testMat->group(groupNum, status)); 41936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 41946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, cgMat->end(status)); 41966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (perlExpr.startsWith("@-")) { 41996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 42006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<=testMat->groupCount(); i++) { 42016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i>0) { 42026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(" "); 42036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ICU_Utility::appendNumber(resultString, testMat->start(i, status)); 42056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 2); 42076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (perlExpr.startsWith("@+")) { 42106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 42116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<=testMat->groupCount(); i++) { 42126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i>0) { 42136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(" "); 42146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ICU_Utility::appendNumber(resultString, testMat->end(i, status)); 42166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 2); 42186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) { // \Escape. Take following char as a literal. 42216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // or as an escaped sequence (e.g. \n) 42226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (perlExpr.length() > 1) { 42236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 1); // Remove the '\', but only if not last char. 42246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c = perlExpr.charAt(0); 42266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (c) { 42276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 'n': c = '\n'; break; 42286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // add any other escape sequences that show up in the test expected results. 42296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(c); 42316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 1); 42326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 42356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Any characters from the perl expression that we don't explicitly 42366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // recognize before here are assumed to be literals and copied 42376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // as-is to the expected results. 42386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(perlExpr.charAt(0)); 42396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 1); 42406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 42436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status)); 42446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 42456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 42496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Expected Results Compare 42506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 42516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString expectedS(fields[4]); 42526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expectedS.findAndReplace(nulnulSrc, nulnul); 42536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expectedS.findAndReplace(ffffSrc, ffff); 42546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 42556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (expectedS.compare(resultString) != 0) { 42586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org err("Line %d: Incorrect perl expression results.", lineNum); 42596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\""); 42606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testMat; 42636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 42646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 42676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // All done. Clean up allocated stuff. 42686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 42696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete cgMat; 42706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete cgPat; 42716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete groupsMat; 42736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete groupsPat; 42746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete flagMat; 42766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete flagPat; 42776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete lineMat; 42796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete linePat; 42806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fieldPat; 42826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete [] testData; 42836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount); 42866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 42886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------- 42916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 42926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// PerlTestsUTF8 Run Perl's regular expression tests on UTF-8-based UTexts 42936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// (instead of using UnicodeStrings) to test the alternate engine. 42946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The input file for this test is re_tests, the standard regular 42956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// expression test data distributed with the Perl source code. 42966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// See PerlTests() for more information. 42976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 42986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------- 42996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::PerlTestsUTF8() { 43006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char tdd[2048]; 43016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *srcPath; 43026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 43036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 43046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org LocalUConverterPointer UTF8Converter(ucnv_open("UTF-8", &status)); 43056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText patternText = UTEXT_INITIALIZER; 43066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *patternChars = NULL; 43076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t patternLength; 43086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t patternCapacity = 0; 43096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText inputText = UTEXT_INITIALIZER; 43106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *inputChars = NULL; 43116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t inputLength; 43126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t inputCapacity = 0; 43136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); 43156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Open and read the test data file. 43186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org srcPath=getPath(tdd, "re_tests.txt"); 43206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(srcPath==NULL) { 43216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; /* something went wrong, error already output */ 43226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t len; 43256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status); 43266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 43276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; /* something went wrong, error already output */ 43286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Put the test data into a UnicodeString 43326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString testDataString(FALSE, testData, len); 43346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Regex to break the input file into lines, and strip the new lines. 43376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // One line per match, capture group one is the desired data. 43386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status); 43406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 43416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dataerrln("RegexPattern::compile() error"); 43426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 43436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher* lineMat = linePat->matcher(testDataString, status); 43456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Regex to split a test file line into fields. 43486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // There are six fields, separated by tabs. 43496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status); 43516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Regex to identify test patterns with flag settings, and to separate them. 43546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test patterns with flags look like 'pattern'i 43556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test patterns without flags are not quoted: pattern 43566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Coming out, capture group 2 is the pattern, capture group 3 is the flags. 43576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status); 43596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher* flagMat = flagPat->matcher(status); 43606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The Perl tests reference several perl-isms, which are evaluated/substituted 43636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // in the test data. Not being perl, this must be done explicitly. Here 43646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // are string constants and REs for these constructs. 43656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString nulnulSrc("${nulnul}"); 43676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString nulnul("\\u0000\\u0000", -1, US_INV); 43686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nulnul = nulnul.unescape(); 43696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString ffffSrc("${ffff}"); 43716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString ffff("\\uffff", -1, US_INV); 43726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ffff = ffff.unescape(); 43736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // regexp for $-[0], $+[2], etc. 43756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status); 43766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *groupsMat = groupsPat->matcher(status); 43776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // regexp for $0, $1, $2, etc. 43796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status); 43806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *cgMat = cgPat->matcher(status); 43816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Main Loop for the Perl Tests, runs once per line from the 43856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // test data file. 43866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t lineNum = 0; 43886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t skippedUnimplementedCount = 0; 43896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (lineMat->find()) { 43906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lineNum++; 43916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Get a line, break it into its fields, do the Perl 43946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // variable substitutions. 43956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString line = lineMat->group(1, status); 43976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString fields[7]; 43986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fieldPat->split(line, fields, 7, status); 43996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flagMat->reset(fields[0]); 44016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flagMat->matches(status); 44026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString pattern = flagMat->group(2, status); 44036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.findAndReplace("${bang}", "!"); 44046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000")); 44056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.findAndReplace(ffffSrc, ffff); 44066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 44086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Identify patterns that include match flag settings, 44096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // split off the flags, remove the extra quotes. 44106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 44116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString flagStr = flagMat->group(3, status); 44126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 44136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); 44146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 44156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flags = 0; 44176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar UChar_c = 0x63; // Char constants for the flag letters. 44186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar UChar_i = 0x69; // (Damn the lack of Unicode support in C) 44196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar UChar_m = 0x6d; 44206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar UChar_x = 0x78; 44216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar UChar_y = 0x79; 44226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flagStr.indexOf(UChar_i) != -1) { 44236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flags |= UREGEX_CASE_INSENSITIVE; 44246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flagStr.indexOf(UChar_m) != -1) { 44266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flags |= UREGEX_MULTILINE; 44276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (flagStr.indexOf(UChar_x) != -1) { 44296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flags |= UREGEX_COMMENTS; 44306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 44336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Put the pattern in a UTF-8 UText 44346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 44356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 44366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patternLength = pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status); 44376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status == U_BUFFER_OVERFLOW_ERROR) { 44386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 44396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete[] patternChars; 44406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patternCapacity = patternLength + 1; 44416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patternChars = new char[patternCapacity]; 44426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status); 44436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&patternText, patternChars, patternLength, &status); 44456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 44476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compile the test pattern. 44486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 44496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *testPat = RegexPattern::compile(&patternText, flags, pe, status); 44506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status == U_REGEX_UNIMPLEMENTED) { 44516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 44526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test of a feature that is planned for ICU, but not yet implemented. 44536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // skip the test. 44546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org skippedUnimplementedCount++; 44556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 44566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 44576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 44586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 44616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Some tests are supposed to generate errors. 44626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Only report an error for tests that are supposed to succeed. 44636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fields[2].indexOf(UChar_c) == -1 && // Compilation is not supposed to fail AND 44646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fields[2].indexOf(UChar_i) == -1) // it's not an accepted ICU incompatibility 44656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 44666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status)); 44676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 44696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 44706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 44716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fields[2].indexOf(UChar_i) >= 0) { 44746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // ICU should skip this test. 44756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 44766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 44776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fields[2].indexOf(UChar_c) >= 0) { 44806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This pattern should have caused a compilation error, but didn't/ 44816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("line %d: Expected a pattern compile error, got success.", lineNum); 44826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 44836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 44846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 44886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // replace the Perl variables that appear in some of the 44896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match data strings. 44906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 44916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString matchString = fields[1]; 44926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchString.findAndReplace(nulnulSrc, nulnul); 44936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchString.findAndReplace(ffffSrc, ffff); 44946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Replace any \n in the match string with an actual new-line char. 44966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Don't do full unescape, as this unescapes more than Perl does, which 44976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // causes other spurious failures in the tests. 44986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 44996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 45016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Put the input in a UTF-8 UText 45026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 45036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 45046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputLength = matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status); 45056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (status == U_BUFFER_OVERFLOW_ERROR) { 45066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 45076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete[] inputChars; 45086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputCapacity = inputLength + 1; 45096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputChars = new char[inputCapacity]; 45106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status); 45116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUTF8(&inputText, inputChars, inputLength, &status); 45136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 45156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Run the test, check for expected match/don't match result. 45166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 45176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *testMat = &testPat->matcher(status)->reset(&inputText); 45186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool found = testMat->find(); 45196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool expected = FALSE; 45206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fields[2].indexOf(UChar_y) >=0) { 45216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expected = TRUE; 45226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (expected != found) { 45246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("line %d: Expected %smatch, got %smatch", 45256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lineNum, expected?"":"no ", found?"":"no " ); 45266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 45276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Don't try to check expected results if there is no match. 45306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (Some have stuff in the expected fields) 45316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!found) { 45326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testMat; 45336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 45346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 45356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 45386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Interpret the Perl expression from the fourth field of the data file, 45396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // building up an ICU string from the results of the ICU match. 45406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The Perl expression will contain references to the results of 45416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // a regex match, including the matched string, capture group strings, 45426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // group starting and ending indicies, etc. 45436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 45446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString resultString; 45456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString perlExpr = fields[3]; 45466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (perlExpr.length() > 0) { 45486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org groupsMat->reset(perlExpr); 45496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cgMat->reset(perlExpr); 45506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (perlExpr.startsWith("$&")) { 45526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(testMat->group(status)); 45536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 2); 45546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (groupsMat->lookingAt(status)) { 45576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // $-[0] $+[2] etc. 45586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString digitString = groupsMat->group(2, status); 45596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t t = 0; 45606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 45616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString plusOrMinus = groupsMat->group(1, status); 45626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t matchPosition; 45636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (plusOrMinus.compare("+") == 0) { 45646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchPosition = testMat->end(groupNum, status); 45656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 45666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matchPosition = testMat->start(groupNum, status); 45676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (matchPosition != -1) { 45696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ICU_Utility::appendNumber(resultString, matchPosition); 45706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, groupsMat->end(status)); 45726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (cgMat->lookingAt(status)) { 45756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // $1, $2, $3, etc. 45766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString digitString = cgMat->group(1, status); 45776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t t = 0; 45786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 45796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status)) { 45806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(testMat->group(groupNum, status)); 45816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 45826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, cgMat->end(status)); 45846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (perlExpr.startsWith("@-")) { 45876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 45886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<=testMat->groupCount(); i++) { 45896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i>0) { 45906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(" "); 45916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ICU_Utility::appendNumber(resultString, testMat->start(i, status)); 45936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 2); 45956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (perlExpr.startsWith("@+")) { 45986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 45996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<=testMat->groupCount(); i++) { 46006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i>0) { 46016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(" "); 46026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ICU_Utility::appendNumber(resultString, testMat->end(i, status)); 46046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 2); 46066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) { // \Escape. Take following char as a literal. 46096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // or as an escaped sequence (e.g. \n) 46106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (perlExpr.length() > 1) { 46116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 1); // Remove the '\', but only if not last char. 46126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c = perlExpr.charAt(0); 46146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (c) { 46156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 'n': c = '\n'; break; 46166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // add any other escape sequences that show up in the test expected results. 46176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(c); 46196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 1); 46206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 46236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Any characters from the perl expression that we don't explicitly 46246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // recognize before here are assumed to be literals and copied 46256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // as-is to the expected results. 46266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultString.append(perlExpr.charAt(0)); 46276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org perlExpr.remove(0, 1); 46286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 46316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status)); 46326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 46336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 46376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Expected Results Compare 46386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 46396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString expectedS(fields[4]); 46406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expectedS.findAndReplace(nulnulSrc, nulnul); 46416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expectedS.findAndReplace(ffffSrc, ffff); 46426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 46436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (expectedS.compare(resultString) != 0) { 46466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org err("Line %d: Incorrect perl expression results.", lineNum); 46476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\""); 46486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testMat; 46516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete testPat; 46526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 46556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // All done. Clean up allocated stuff. 46566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 46576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete cgMat; 46586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete cgPat; 46596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete groupsMat; 46616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete groupsPat; 46626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete flagMat; 46646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete flagPat; 46656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete lineMat; 46676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete linePat; 46686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fieldPat; 46706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete [] testData; 46716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&patternText); 46736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&inputText); 46746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete [] patternChars; 46766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete [] inputChars; 46776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount); 46806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 46826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------- 46856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 46866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Bug6149 Verify limits to heap expansion for backtrack stack. 46876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Use this pattern, 46886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// "(a?){1,8000000}" 46896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Note: was an unbounded upperbounds, but that now has loop-breaking enabled. 46906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// This test is likely to be fragile, as further optimizations stop 46916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// more cases of pointless looping in the match engine. 46926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 46936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------- 46946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug6149() { 46956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString pattern("(a?){1,8000000}"); 46966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString s("xyz"); 46976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t flags = 0; 46986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 46996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher matcher(pattern, s, flags, status); 47016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool result = false; 47026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_FAIL(result=matcher.matches(status), U_REGEX_STACK_OVERFLOW); 47036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == FALSE); 47046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 47086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Callbacks() Test the callback function. 47096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// When set, callbacks occur periodically during matching operations, 47106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// giving the application code the ability to abort the operation 47116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// before it's normal completion. 47126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 47136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct callBackContext { 47156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexTest *test; 47166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxCalls; 47176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t numCalls; 47186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t lastSteps; 47196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void reset(int32_t max) {maxCalls=max; numCalls=0; lastSteps=0;}; 47206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 47216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN 47236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV 47246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtestCallBackFn(const void *context, int32_t steps) { 47256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org callBackContext *info = (callBackContext *)context; 47266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (info->lastSteps+1 != steps) { 47276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org info->test->errln("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps); 47286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org info->lastSteps = steps; 47306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org info->numCalls++; 47316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (info->numCalls < info->maxCalls); 47326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 47336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END 47346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Callbacks() { 47366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 47376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Getter returns NULLs if no callback has been set 47386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The variables that the getter will fill in. 47406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Init to non-null values so that the action of the getter can be seen. 47416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const void *returnedContext = &returnedContext; 47426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org URegexMatchCallback *returnedFn = &testCallBackFn; 47436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 47456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher matcher("x", 0, status); 47466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 47476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.getMatchCallback(returnedFn, returnedContext, status); 47486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 47496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(returnedFn == NULL); 47506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(returnedContext == NULL); 47516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 47546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Set and Get work 47556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org callBackContext cbInfo = {this, 0, 0, 0}; 47566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const void *returnedContext; 47576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org URegexMatchCallback *returnedFn; 47586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 47596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. 47606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 47616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.setMatchCallback(testCallBackFn, &cbInfo, status); 47626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 47636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.getMatchCallback(returnedFn, returnedContext, status); 47646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 47656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(returnedFn == testCallBackFn); 47666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(returnedContext == &cbInfo); 47676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A short-running match shouldn't invoke the callback 47696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 47706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cbInfo.reset(1); 47716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString s = "xxx"; 47726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.reset(s); 47736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.matches(status)); 47746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 47756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(cbInfo.numCalls == 0); 47766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A medium-length match that runs long enough to invoke the 47786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // callback, but not so long that the callback aborts it. 47796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 47806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cbInfo.reset(4); 47816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = "aaaaaaaaaaaaaaaaaaab"; 47826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.reset(s); 47836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.matches(status)==FALSE); 47846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 47856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(cbInfo.numCalls > 0); 47866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A longer running match that the callback function will abort. 47886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 47896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cbInfo.reset(4); 47906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = "aaaaaaaaaaaaaaaaaaaaaaab"; 47916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.reset(s); 47926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.matches(status)==FALSE); 47936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); 47946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(cbInfo.numCalls == 4); 47956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 47996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 48026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// FindProgressCallbacks() Test the find "progress" callback function. 48036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// When set, the find progress callback will be invoked during a find operations 48046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// after each return from a match attempt, giving the application the opportunity 48056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// to terminate a long-running find operation before it's normal completion. 48066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 48076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct progressCallBackContext { 48096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexTest *test; 48106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t lastIndex; 48116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxCalls; 48126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t numCalls; 48136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}; 48146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 48156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN 48176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV 48186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtestProgressCallBackFn(const void *context, int64_t matchIndex) { 48196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org progressCallBackContext *info = (progressCallBackContext *)context; 48206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org info->numCalls++; 48216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org info->lastIndex = matchIndex; 48226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n", matchIndex, info->numCalls); 48236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (info->numCalls < info->maxCalls); 48246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 48256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END 48266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::FindProgressCallbacks() { 48286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 48296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Getter returns NULLs if no callback has been set 48306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The variables that the getter will fill in. 48326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Init to non-null values so that the action of the getter can be seen. 48336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const void *returnedContext = &returnedContext; 48346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org URegexFindProgressCallback *returnedFn = &testProgressCallBackFn; 48356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 48376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher matcher("x", 0, status); 48386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 48396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.getFindProgressCallback(returnedFn, returnedContext, status); 48406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 48416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(returnedFn == NULL); 48426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(returnedContext == NULL); 48436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 48466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Set and Get work 48476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org progressCallBackContext cbInfo = {this, 0, 0, 0}; 48486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const void *returnedContext; 48496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org URegexFindProgressCallback *returnedFn; 48506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 48516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. 48526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 48536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status); 48546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 48556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.getFindProgressCallback(returnedFn, returnedContext, status); 48566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 48576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(returnedFn == testProgressCallBackFn); 48586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(returnedContext == &cbInfo); 48596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A short-running match should NOT invoke the callback. 48616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 48626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cbInfo.reset(100); 48636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString s = "abxxx"; 48646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.reset(s); 48656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 48666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.setTrace(TRUE); 48676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 48686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.find(0, status)); 48696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 48706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(cbInfo.numCalls == 0); 48716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A medium running match that causes matcher.find() to invoke our callback for each index. 48736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 48746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = "aaaaaaaaaaaaaaaaaaab"; 48756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string 48766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.reset(s); 48776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.find(0, status)==FALSE); 48786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 48796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25); 48806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point. 48826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 48836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab"; 48846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string 48856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.reset(s1); 48866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.find(0, status)==FALSE); 48876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 48886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5); 48896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 48916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Now a match that will succeed, but after an interruption 48926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 48936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx"; 48946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string 48956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org matcher.reset(s2); 48966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.find(0, status)==FALSE); 48976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 48986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Now retry the match from where left off 48996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cbInfo.maxCalls = 100; // No callback limit 49006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status)); 49016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 49026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 49036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 49076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 49106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 49116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// PreAllocatedUTextCAPI Check the C API with pre-allocated mutable 49126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// UTexts. The pure-C implementation of UText 49136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// has no mutable backing stores, but we can 49146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// use UnicodeString here to test the functionality. 49156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 49166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------- 49176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::PreAllocatedUTextCAPI () { 49186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 49196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org URegularExpression *re; 49206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText patternText = UTEXT_INITIALIZER; 49216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString buffer; 49226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText bufferText = UTEXT_INITIALIZER; 49236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&bufferText, &buffer, &status); 49256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 49276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * getText() and getUText() 49286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 49296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 49306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText text1 = UTEXT_INITIALIZER; 49316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText text2 = UTEXT_INITIALIZER; 49326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar text2Chars[20]; 49336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText *resultText; 49346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 49366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status); 49376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status); 49386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2); 49396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUChars(&text2, text2Chars, -1, &status); 49406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status); 49426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org re = uregex_openUText(&patternText, 0, NULL, &status); 49436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* First set a UText */ 49456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_setUText(re, &text1, &status); 49466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultText = uregex_getUText(re, &bufferText, &status); 49476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 49486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(resultText == &bufferText); 49496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(resultText, 0); 49506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(&text1, 0); 49516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(testUTextEqual(resultText, &text1)); 49526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultText = uregex_getUText(re, &bufferText, &status); 49546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 49556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(resultText == &bufferText); 49566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(resultText, 0); 49576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(&text1, 0); 49586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(testUTextEqual(resultText, &text1)); 49596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Then set a UChar * */ 49616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_setText(re, text2Chars, 7, &status); 49626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resultText = uregex_getUText(re, &bufferText, &status); 49636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 49646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(resultText == &bufferText); 49656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(resultText, 0); 49666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(&text2, 0); 49676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(testUTextEqual(resultText, &text2)); 49686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_close(re); 49706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&text1); 49716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&text2); 49726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 49756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * group() 49766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 49776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 49786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar text1[80]; 49796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText *actual; 49806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool result; 49816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 49826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 49846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org re = uregex_openC("abc(.*?)def", 0, NULL, &status); 49856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 49866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_setText(re, text1, -1, &status); 49886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = uregex_find(re, 0, &status); 49896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result==TRUE); 49906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Capture Group 0, the full match. Should succeed. */ 49926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 49936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org actual = uregex_groupUTextDeep(re, 0, &bufferText, &status); 49946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 49956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(actual == &bufferText); 49966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual); 49976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Capture group #1. Should succeed. */ 49996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 50006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org actual = uregex_groupUTextDeep(re, 1, &bufferText, &status); 50016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 50026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(actual == &bufferText); 50036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual); 50046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Capture group out of range. Error. */ 50066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 50076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org actual = uregex_groupUTextDeep(re, 2, &bufferText, &status); 50086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 50096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(actual == &bufferText); 50106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_close(re); 50126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 50166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * replaceFirst() 50176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 50186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 50196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar text1[80]; 50206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar text2[80]; 50216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText replText = UTEXT_INITIALIZER; 50226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText *result; 50236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 50256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 50266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 50276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); 50286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org re = uregex_openC("x(.*?)x", 0, NULL, &status); 50306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 50316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Normal case, with match */ 50336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_setText(re, text1, -1, &status); 50346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 50356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 50366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 50376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &bufferText); 50386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result); 50396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* No match. Text should copy to output with no changes. */ 50416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_setText(re, text2, -1, &status); 50426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 50436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 50446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 50456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &bufferText); 50466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); 50476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Unicode escapes */ 50496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_setText(re, text1, -1, &status); 50506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status); 50516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 50526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 50536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 50546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &bufferText); 50556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result); 50566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_close(re); 50586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&replText); 50596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 50636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * replaceAll() 50646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 50656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 50666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar text1[80]; 50676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar text2[80]; 50686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText replText = UTEXT_INITIALIZER; 50696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText *result; 50706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 50726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 50736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 50746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); 50756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org re = uregex_openC("x(.*?)x", 0, NULL, &status); 50776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 50786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Normal case, with match */ 50806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_setText(re, text1, -1, &status); 50816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 50826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = uregex_replaceAllUText(re, &replText, &bufferText, &status); 50836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 50846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &bufferText); 50856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> <1> <...>.", result); 50866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* No match. Text should copy to output with no changes. */ 50886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_setText(re, text2, -1, &status); 50896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 50906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = uregex_replaceAllUText(re, &replText, &bufferText, &status); 50916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 50926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(result == &bufferText); 50936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); 50946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uregex_close(re); 50966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&replText); 50976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 51016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * splitUText() uses the C++ API directly, and the UnicodeString version uses mutable UTexts, 51026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * so we don't need to test it here. 51036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 51046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&bufferText); 51066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&patternText); 51076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 51086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------- 51106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 51116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Bug7651 Regex pattern that exceeds default operator stack depth in matcher. 51126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 51136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------- 51146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug7651() { 51156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString pattern1("((?<![A-Za-z0-9])[#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|(?<![A-Za-z0-9_])[@\\uff20][A-Za-z0-9_]+(?:\\/[\\w-]+)?|(https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|\\$[A-Za-z]+)"); 51166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The following should exceed the default operator stack depth in the matcher, i.e. force the matcher to malloc instead of using fSmallData. 51176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // It will cause a segfault if RegexMatcher tries to use fSmallData instead of malloc'ing the memory needed (see init2) for the pattern operator stack allocation. 51186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString pattern2("((https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|(?<![A-Za-z0-9_])[\\@\\uff20][A-Za-z0-9_]+(?:\\/[\\w\\-]+)?|(?<![A-Za-z0-9])[\\#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|\\$[A-Za-z]+)"); 51196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString s("#ff @abcd This is test"); 51206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexPattern *REPattern = NULL; 51216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *REMatcher = NULL; 51226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 51236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 51246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REPattern = RegexPattern::compile(pattern1, 0, pe, status); 51266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 51276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REMatcher = REPattern->matcher(s, status); 51286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 51296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(REMatcher->find()); 51306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(REMatcher->start(status) == 0); 51316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete REPattern; 51326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete REMatcher; 51336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 51346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REPattern = RegexPattern::compile(pattern2, 0, pe, status); 51366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 51376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REMatcher = REPattern->matcher(s, status); 51386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 51396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(REMatcher->find()); 51406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(REMatcher->start(status) == 0); 51416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete REPattern; 51426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete REMatcher; 51436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 51446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug7740() { 51476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 51486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString pattern = "(a)"; 51496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString text = "abcdef"; 51506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher *m = new RegexMatcher(pattern, text, 0, status); 51516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 51526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(m->lookingAt(status)); 51536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 51546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ILLEGAL_ARGUMENT_ERROR; 51556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString s = m->group(1, status); // Bug 7740: segfault here. 51566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 51576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(s == ""); 51586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete m; 51596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 51606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Bug 8479: was crashing whith a Bogus UnicodeString as input. 51626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug8479() { 51646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 51656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher* const pMatcher = new RegexMatcher("\\Aboo\\z", UREGEX_DOTALL|UREGEX_CASE_INSENSITIVE, status); 51676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 51686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status)) 51696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 51706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString str; 51716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org str.setToBogus(); 51726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pMatcher->reset(str); 51736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 51746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pMatcher->matches(status); 51756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 51766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pMatcher; 51776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 51796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Bug 7029 51826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug7029() { 51836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 51846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status); 51866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString text = "abc.def"; 51876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString splits[10]; 51886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 51896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t numFields = pMatcher->split(text, splits, 10, status); 51906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 51916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(numFields == 8); 51926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete pMatcher; 51936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 51946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Bug 9283 51966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// This test is checking for the existance of any supplemental characters that case-fold 51976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// to a bmp character. 51986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 51996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// At the time of this writing there are none. If any should appear in a subsequent release 52006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// of Unicode, the code in regular expressions compilation that determines the longest 52016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// posssible match for a literal string will need to be enhanced. 52026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 52036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength() 52046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// for details on what to do in case of a failure of this test. 52056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 52066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug9283() { 52076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 52086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]]", status); 52096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_CHECK_STATUS; 52106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t index; 52116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 52126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (index=0; ; index++) { 52136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = supplementalsWithCaseFolding.charAt(index); 52146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == -1) { 52156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 52166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString cf = UnicodeString(c).foldCase(); 52186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_ASSERT(cf.length() >= 2); 52196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 52216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::CheckInvBufSize() { 52246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(inv_next>=INV_BUFSIZ) { 52256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %d )\n", 52266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org __FILE__, INV_BUFSIZ, inv_next); 52276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 52286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next); 52296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 52316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 52336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5234