1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/******************************************************************** 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT: 3103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Copyright (c) 2002-2012, International Business Machines Corporation and 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************/ 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// regextst.cpp 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// ICU Regular Expressions test, part of intltest. 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho NOTE!! 15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho PLEASE be careful about ASCII assumptions in this test. 17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho This test is one of the worst repeat offenders. 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho If you have questions, contact someone on the ICU PMC 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho who has access to an EBCDIC system. 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "intltest.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h" 29103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/uniset.h" 3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/ustring.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regextst.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h" 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h> 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h> 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 3727f654740f2a26ad62a5c155af9199af9e69b889claireho#include "cstring.h" 3827f654740f2a26ad62a5c155af9199af9e69b889claireho#include "uinvchar.h" 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define SUPPORT_MUTATING_INPUT_STRING 0 4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test class boilerplate 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexTest::RegexTest() 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexTest::~RegexTest() 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) logln("TestSuite RegexTest: "); 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (index) { 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: name = "Basic"; 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) Basic(); 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: name = "API_Match"; 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) API_Match(); 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: name = "API_Replace"; 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) API_Replace(); 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: name = "API_Pattern"; 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) API_Pattern(); 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case 4: 7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho name = "Extended"; 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) Extended(); 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho name = "skip"; 8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 5: name = "Errors"; 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) Errors(); 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 6: name = "PerlTests"; 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) PerlTests(); 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 89c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case 7: name = "Callbacks"; 90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) Callbacks(); 91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 9227f654740f2a26ad62a5c155af9199af9e69b889claireho case 8: name = "FindProgressCallbacks"; 9327f654740f2a26ad62a5c155af9199af9e69b889claireho if (exec) FindProgressCallbacks(); 9427f654740f2a26ad62a5c155af9199af9e69b889claireho break; 9527f654740f2a26ad62a5c155af9199af9e69b889claireho case 9: name = "Bug 6149"; 96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) Bug6149(); 97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 9827f654740f2a26ad62a5c155af9199af9e69b889claireho case 10: name = "UTextBasic"; 9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) UTextBasic(); 10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 10127f654740f2a26ad62a5c155af9199af9e69b889claireho case 11: name = "API_Match_UTF8"; 10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) API_Match_UTF8(); 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 10427f654740f2a26ad62a5c155af9199af9e69b889claireho case 12: name = "API_Replace_UTF8"; 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) API_Replace_UTF8(); 10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 10727f654740f2a26ad62a5c155af9199af9e69b889claireho case 13: name = "API_Pattern_UTF8"; 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) API_Pattern_UTF8(); 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 11027f654740f2a26ad62a5c155af9199af9e69b889claireho case 14: name = "PerlTestsUTF8"; 11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) PerlTestsUTF8(); 11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 11327f654740f2a26ad62a5c155af9199af9e69b889claireho case 15: name = "PreAllocatedUTextCAPI"; 11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) PreAllocatedUTextCAPI(); 11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 11627f654740f2a26ad62a5c155af9199af9e69b889claireho case 16: name = "Bug 7651"; 11727f654740f2a26ad62a5c155af9199af9e69b889claireho if (exec) Bug7651(); 11827f654740f2a26ad62a5c155af9199af9e69b889claireho break; 11927f654740f2a26ad62a5c155af9199af9e69b889claireho case 17: name = "Bug 7740"; 12027f654740f2a26ad62a5c155af9199af9e69b889claireho if (exec) Bug7740(); 12127f654740f2a26ad62a5c155af9199af9e69b889claireho break; 122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho case 18: name = "Bug 8479"; 123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (exec) Bug8479(); 124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho case 19: name = "Bug 7029"; 126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (exec) Bug7029(); 127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho case 20: name = "CheckInvBufSize"; 129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (exec) CheckInvBufSize(); 130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 131103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 21: name = "Bug 9283"; 132103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (exec) Bug9283(); 133103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: name = ""; 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; //needed to end loop 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 14227f654740f2a26ad62a5c155af9199af9e69b889claireho/** 14327f654740f2a26ad62a5c155af9199af9e69b889claireho * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage 14427f654740f2a26ad62a5c155af9199af9e69b889claireho * into ASCII. 14527f654740f2a26ad62a5c155af9199af9e69b889claireho * @see utext_openUTF8 14627f654740f2a26ad62a5c155af9199af9e69b889claireho */ 14727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status); 14827f654740f2a26ad62a5c155af9199af9e69b889claireho 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Error Checking / Reporting macros used in all of the tests. 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 15527f654740f2a26ad62a5c155af9199af9e69b889clairehostatic void utextToPrintable(char *buf, int32_t bufLen, UText *text) { 15627f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t oldIndex = utext_getNativeIndex(text); 15727f654740f2a26ad62a5c155af9199af9e69b889claireho utext_setNativeIndex(text, 0); 15827f654740f2a26ad62a5c155af9199af9e69b889claireho char *bufPtr = buf; 15927f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c = utext_next32From(text, 0); 16027f654740f2a26ad62a5c155af9199af9e69b889claireho while ((c != U_SENTINEL) && (bufPtr < buf+bufLen)) { 16127f654740f2a26ad62a5c155af9199af9e69b889claireho if (0x000020<=c && c<0x00007e) { 16227f654740f2a26ad62a5c155af9199af9e69b889claireho *bufPtr = c; 16327f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 16427f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0 16527f654740f2a26ad62a5c155af9199af9e69b889claireho sprintf(bufPtr,"U+%04X", c); 16627f654740f2a26ad62a5c155af9199af9e69b889claireho bufPtr+= strlen(bufPtr)-1; 16727f654740f2a26ad62a5c155af9199af9e69b889claireho#else 16827f654740f2a26ad62a5c155af9199af9e69b889claireho *bufPtr = '%'; 16927f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 17027f654740f2a26ad62a5c155af9199af9e69b889claireho } 17127f654740f2a26ad62a5c155af9199af9e69b889claireho bufPtr++; 17227f654740f2a26ad62a5c155af9199af9e69b889claireho c = UTEXT_NEXT32(text); 17327f654740f2a26ad62a5c155af9199af9e69b889claireho } 17427f654740f2a26ad62a5c155af9199af9e69b889claireho *bufPtr = 0; 17527f654740f2a26ad62a5c155af9199af9e69b889claireho#if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY) 17627f654740f2a26ad62a5c155af9199af9e69b889claireho char *ebuf = (char*)malloc(bufLen); 17727f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_eastrncpy((unsigned char*)ebuf, (const unsigned char*)buf, bufLen); 17827f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_strncpy(buf, ebuf, bufLen); 17927f654740f2a26ad62a5c155af9199af9e69b889claireho free((void*)ebuf); 18027f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 18127f654740f2a26ad62a5c155af9199af9e69b889claireho utext_setNativeIndex(text, oldIndex); 18227f654740f2a26ad62a5c155af9199af9e69b889claireho} 18327f654740f2a26ad62a5c155af9199af9e69b889claireho 184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic char ASSERT_BUF[1024]; 186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 187103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst char* RegexTest::extractToAssertBuf(const UnicodeString& message) { 188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(message.length()==0) { 189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strcpy(ASSERT_BUF, "[[empty UnicodeString]]"); 190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString buf; 192103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius IntlTest::prettify(message,buf); 193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(buf.length()==0) { 194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strcpy(ASSERT_BUF, "[[escape() returned 0 chars]]"); 195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buf.extract(0, 0x7FFFFFFF, ASSERT_BUF, sizeof(ASSERT_BUF)-1); 197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(ASSERT_BUF[0]==0) { 198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ASSERT_BUF[0]=0; 199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(int32_t i=0;i<buf.length();i++) { 200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar ch = buf[i]; 201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch); 202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0; 207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return ASSERT_BUF; 208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 21127f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);} 21227f654740f2a26ad62a5c155af9199af9e69b889claireho 21327f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure. status=%s", \ 21427f654740f2a26ad62a5c155af9199af9e69b889claireho __FILE__, __LINE__, u_errorName(status)); return;}} 21527f654740f2a26ad62a5c155af9199af9e69b889claireho 21627f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};} 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\ 2196d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queruif (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status=%s, got %s", \ 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru __LINE__, u_errorName(errcode), u_errorName(status));};} 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \ 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "RegexTest failure at line %d, from %d. status=%d\n",__LINE__, (line), status); }} 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \ 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}} 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define REGEX_ASSERT_UNISTR(ustr,inv) {if (!(ustr==inv)) {errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s,%s) failed \n", __FILE__, __LINE__, extractToAssertBuf(ustr),inv);};} 229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 230103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 231103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic UBool testUTextEqual(UText *uta, UText *utb) { 232103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 ca = 0; 233103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 cb = 0; 234103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius utext_setNativeIndex(uta, 0); 235103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius utext_setNativeIndex(utb, 0); 236103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius do { 237103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius ca = utext_next32(uta); 238103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius cb = utext_next32(utb); 239103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (ca != cb) { 240103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 241103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 242103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } while (ca != U_SENTINEL); 243103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return ca == cb; 244103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 245103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 246103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 24727f654740f2a26ad62a5c155af9199af9e69b889claireho/** 24827f654740f2a26ad62a5c155af9199af9e69b889claireho * @param expected expected text in UTF-8 (not platform) codepage 24927f654740f2a26ad62a5c155af9199af9e69b889claireho */ 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::assertUText(const char *expected, UText *actual, const char *file, int line) { 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText expectedText = UTEXT_INITIALIZER; 25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&expectedText, expected, -1, &status); 25427f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_FAILURE(status)) { 25527f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: assertUText: error %s calling utext_openUTF8(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected)); 25627f654740f2a26ad62a5c155af9199af9e69b889claireho return; 25727f654740f2a26ad62a5c155af9199af9e69b889claireho } 25827f654740f2a26ad62a5c155af9199af9e69b889claireho if(utext_nativeLength(&expectedText)==0 && (strlen(expected)!=0)) { 25927f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: assertUText: expected is %d utf-8 bytes, but utext_nativeLength(expectedText) returned 0.", file, line, strlen(expected)); 26027f654740f2a26ad62a5c155af9199af9e69b889claireho return; 26127f654740f2a26ad62a5c155af9199af9e69b889claireho } 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(actual, 0); 263103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (!testUTextEqual(&expectedText, actual)) { 26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char buf[201 /*21*/]; 26527f654740f2a26ad62a5c155af9199af9e69b889claireho char expectedBuf[201]; 26627f654740f2a26ad62a5c155af9199af9e69b889claireho utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual); 26727f654740f2a26ad62a5c155af9199af9e69b889claireho utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText); 26827f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: assertUText: Failure: expected \"%s\" (%d chars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual)); 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&expectedText); 27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 27227f654740f2a26ad62a5c155af9199af9e69b889claireho/** 27327f654740f2a26ad62a5c155af9199af9e69b889claireho * @param expected invariant (platform local text) input 27427f654740f2a26ad62a5c155af9199af9e69b889claireho */ 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 27627f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::assertUTextInvariant(const char *expected, UText *actual, const char *file, int line) { 27727f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status = U_ZERO_ERROR; 27827f654740f2a26ad62a5c155af9199af9e69b889claireho UText expectedText = UTEXT_INITIALIZER; 27927f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&expectedText, expected, -1, &status); 28027f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_FAILURE(status)) { 28127f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: assertUTextInvariant: error %s calling regextst_openUTF8FromInvariant(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected)); 28227f654740f2a26ad62a5c155af9199af9e69b889claireho return; 28327f654740f2a26ad62a5c155af9199af9e69b889claireho } 28427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_setNativeIndex(actual, 0); 285103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (!testUTextEqual(&expectedText, actual)) { 28627f654740f2a26ad62a5c155af9199af9e69b889claireho char buf[201 /*21*/]; 28727f654740f2a26ad62a5c155af9199af9e69b889claireho char expectedBuf[201]; 28827f654740f2a26ad62a5c155af9199af9e69b889claireho utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual); 28927f654740f2a26ad62a5c155af9199af9e69b889claireho utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText); 29027f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual)); 29127f654740f2a26ad62a5c155af9199af9e69b889claireho } 29227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_close(&expectedText); 29327f654740f2a26ad62a5c155af9199af9e69b889claireho} 29427f654740f2a26ad62a5c155af9199af9e69b889claireho 29527f654740f2a26ad62a5c155af9199af9e69b889claireho/** 29627f654740f2a26ad62a5c155af9199af9e69b889claireho * Assumes utf-8 input 29727f654740f2a26ad62a5c155af9199af9e69b889claireho */ 29827f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actual), __FILE__, __LINE__) 29927f654740f2a26ad62a5c155af9199af9e69b889claireho/** 30027f654740f2a26ad62a5c155af9199af9e69b889claireho * Assumes Invariant input 30127f654740f2a26ad62a5c155af9199af9e69b889claireho */ 30227f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((expected), (actual), __FILE__, __LINE__) 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This buffer ( inv_buf ) is used to hold the UTF-8 strings 306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * passed into utext_openUTF8. An error will be given if 307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * INV_BUFSIZ is too small. It's only used on EBCDIC systems. 308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define INV_BUFSIZ 2048 /* increase this if too small */ 311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 31254dcd9b6a06071f647dac967e9e267abb9410720Craig Corneliusstatic int64_t inv_next=0; 313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if U_CHARSET_FAMILY!=U_ASCII_FAMILY 315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic char inv_buf[INV_BUFSIZ]; 316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif 317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t length, UErrorCode *status) { 319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length==-1) length=strlen(inv); 320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if U_CHARSET_FAMILY==U_ASCII_FAMILY 321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inv_next+=length; 322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return utext_openUTF8(ut, inv, length, status); 323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#else 324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(inv_next+length+1>INV_BUFSIZ) { 325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fprintf(stderr, "%s:%d Error: INV_BUFSIZ #defined to be %d but needs to be at least %d.\n", 326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho __FILE__, __LINE__, INV_BUFSIZ, (inv_next+length+1)); 327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *status = U_MEMORY_ALLOCATION_ERROR; 328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho unsigned char *buf = (unsigned char*)inv_buf+inv_next; 332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_aestrncpy(buf, (const uint8_t*)inv, length); 333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inv_next+=length; 334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if 0 336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fprintf(stderr, " Note: INV_BUFSIZ at %d, used=%d\n", INV_BUFSIZ, inv_next); 337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif 338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return utext_openUTF8(ut, (const char*)buf, length, status); 340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif 341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// REGEX_TESTLM Macro + invocation function to simplify writing quick tests 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// for the LookingAt() and Match() functions. 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// usage: 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// REGEX_TESTLM("pattern", "input text", lookingAt expected, matches expected); 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The expected results are UBool - TRUE or FALSE. 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The input text is unescaped. The pattern is not. 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define REGEX_TESTLM(pat, text, looking, match) {doRegexLMTest(pat, text, looking, match, __LINE__);doRegexLMTestUTF8(pat, text, looking, match, __LINE__);} 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line) { 361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString pattern(pat, -1, US_INV); 362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString inputText(text, -1, US_INV); 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *REPattern = NULL; 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *REMatcher = NULL; 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool retVal = TRUE; 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString patString(pat, -1, US_INV); 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REPattern = RegexPattern::compile(patString, 0, pe, status); 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 3726d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("RegexTest failure in RegexPattern::compile() at line %d. Status = %s", 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru line, u_errorName(status)); 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (line==376) { RegexPatternDump(REPattern);} 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString inputString(inputText); 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString unEscapedInput = inputString.unescape(); 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REMatcher = REPattern->matcher(unEscapedInput, status); 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure in REPattern::matcher() at line %d. Status = %s\n", 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru line, u_errorName(status)); 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool actualmatch; 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru actualmatch = REMatcher->lookingAt(status); 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure in lookingAt() at line %d. Status = %s\n", 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru line, u_errorName(status)); 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retVal = FALSE; 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (actualmatch != looking) { 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest: wrong return from lookingAt() at line %d.\n", line); 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retVal = FALSE; 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru actualmatch = REMatcher->matches(status); 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure in matches() at line %d. Status = %s\n", 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru line, u_errorName(status)); 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retVal = FALSE; 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (actualmatch != match) { 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest: wrong return from matches() at line %d.\n", line); 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retVal = FALSE; 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (retVal == FALSE) { 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPatternDump(REPattern); 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete REPattern; 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete REMatcher; 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retVal; 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool looking, UBool match, int32_t line) { 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText pattern = UTEXT_INITIALIZER; 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t inputUTF8Length; 42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *textChars = NULL; 42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText inputText = UTEXT_INITIALIZER; 42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *REPattern = NULL; 42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *REMatcher = NULL; 43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool retVal = TRUE; 43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 43227f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&pattern, pat, -1, &status); 43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REPattern = RegexPattern::compile(&pattern, 0, pe, status); 43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8). Status = %s\n", 43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, u_errorName(status)); 43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString inputString(text, -1, US_INV); 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString unEscapedInput = inputString.unescape(); 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status)); 44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status); 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // UTF-8 does not allow unpaired surrogates, so this could actually happen 44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho logln("RegexTest unable to convert input to UTF8 at line %d. Status = %s\n", line, u_errorName(status)); 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; // not a failure of the Regex engine 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; // buffer overflow 45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho textChars = new char[inputUTF8Length+1]; 45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(), status); 45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&inputText, textChars, inputUTF8Length, &status); 45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 456b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REMatcher = &REPattern->matcher(status)->reset(&inputText); 45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Status = %s\n", 45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, u_errorName(status)); 46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool actualmatch; 46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho actualmatch = REMatcher->lookingAt(status); 46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("RegexTest failure in lookingAt() at line %d (UTF8). Status = %s\n", 46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, u_errorName(status)); 46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retVal = FALSE; 46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (actualmatch != looking) { 47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("RegexTest: wrong return from lookingAt() at line %d (UTF8).\n", line); 47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retVal = FALSE; 47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho actualmatch = REMatcher->matches(status); 47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("RegexTest failure in matches() at line %d (UTF8). Status = %s\n", 47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, u_errorName(status)); 48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retVal = FALSE; 48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (actualmatch != match) { 48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", line); 48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retVal = FALSE; 48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (retVal == FALSE) { 48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPatternDump(REPattern); 48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REPattern; 49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REMatcher; 49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&inputText); 49450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&pattern); 49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] textChars; 49650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retVal; 49750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// REGEX_ERR Macro + invocation function to simplify writing tests 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// regex tests for incorrect patterns 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// usage: 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// REGEX_ERR("pattern", expected error line, column, expected status); 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ERR(pat, line, col, status) regex_err(pat, line, col, status, __LINE__); 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol, 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode expectedStatus, int32_t line) { 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pattern(pat); 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *callerPattern = NULL; 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compile the caller's pattern 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString patString(pat); 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru callerPattern = RegexPattern::compile(patString, 0, pe, status); 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (status != expectedStatus) { 5266d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status)); 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (status != U_ZERO_ERROR) { 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pe.line != errLine || pe.offset != errCol) { 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Line %d: incorrect line/offset from UParseError. Expected %d/%d; got %d/%d.\n", 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru line, errLine, errCol, pe.line, pe.offset); 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete callerPattern; 53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compile again, using a UTF-8-based UText 54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 54150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText patternText = UTEXT_INITIALIZER; 54227f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&patternText, pat, -1, &status); 54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho callerPattern = RegexPattern::compile(&patternText, 0, pe, status); 54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status != expectedStatus) { 54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status)); 54650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 54750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status != U_ZERO_ERROR) { 54850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pe.line != errLine || pe.offset != errCol) { 54950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Line %d: incorrect line/offset from UParseError. Expected %d/%d; got %d/%d.\n", 55050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, errLine, errCol, pe.line, pe.offset); 55150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete callerPattern; 55650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&patternText); 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Basic Check for basic functionality of regex pattern matching. 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Avoid the use of REGEX_FIND test macro, which has 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// substantial dependencies on basic Regex functionality. 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::Basic() { 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Debug - slide failing test cases early 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE); 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 579103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius RegexPattern *pattern; 580103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unescape(), UREGEX_CASE_INSENSITIVE, pe, status); 581103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius RegexPatternDump(pattern); 582103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz").unescape(), status); 583103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UBool result = m->find(); 584103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius printf("result = %d\n", result); 585103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd"); 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX===================="); 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exit(1); 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Pattern with parentheses 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)ring", "stabcring thing", TRUE, FALSE); 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)ring", "stabcring", TRUE, TRUE); 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)ring", "stabcrung", FALSE, FALSE); 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Patterns with * 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)*ring", "string", TRUE, TRUE); 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)*ring", "stabcring", TRUE, TRUE); 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)*ring", "stabcabcring", TRUE, TRUE); 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)*ring", "stabcabcdring", FALSE, FALSE); 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)*ring", "stabcabcabcring etc.", TRUE, FALSE); 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a*", "", TRUE, TRUE); 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a*", "b", TRUE, FALSE); 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Patterns with "." 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".", "abc", TRUE, FALSE); 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("...", "abc", TRUE, TRUE); 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("....", "abc", FALSE, FALSE); 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".*", "abcxyz123", TRUE, TRUE); 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab.*xyz", "abcdefghij", FALSE, FALSE); 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab.*xyz", "abcdefg...wxyz", TRUE, TRUE); 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz", TRUE, TRUE); 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz...", TRUE, FALSE); 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Patterns with * applied to chars at end of literal string 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("abc*", "ab", TRUE, TRUE); 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("abc*", "abccccc", TRUE, TRUE); 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Supplemental chars match as single chars, not a pair of surrogates. 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".", "\\U00011000", TRUE, TRUE); 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("...", "\\U00011000x\\U00012002", TRUE, TRUE); 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("...", "\\U00011000x\\U00012002y", TRUE, FALSE); 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UnicodeSets in the pattern 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[1-6]", "1", TRUE, TRUE); 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[1-6]", "3", TRUE, TRUE); 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[1-6]", "7", FALSE, FALSE); 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE); 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE); 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a[1-6]b", "a3b", TRUE, TRUE); 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a[0-9]*b", "a123b", TRUE, TRUE); 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a[0-9]*b", "abc", TRUE, FALSE); 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[\\p{Nd}]*", "123456", TRUE, TRUE); 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[\\p{Nd}]*", "a123456", TRUE, FALSE); // note that * matches 0 occurences. 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[a][b][[:Zs:]]*", "ab ", TRUE, TRUE); 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // OR operator in patterns 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|b)", "a", TRUE, TRUE); 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|b)", "b", TRUE, TRUE); 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|b)", "c", FALSE, FALSE); 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a|b", "b", TRUE, TRUE); 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabc", TRUE, TRUE); 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabdc", TRUE, FALSE); 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "ac", TRUE, TRUE); 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "123", TRUE, TRUE); 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "123", TRUE, TRUE); 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "222211111czzzzw", TRUE, FALSE); 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // + 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab+", "abbc", TRUE, FALSE); 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab+c", "ac", FALSE, FALSE); 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("b+", "", FALSE, FALSE); 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(abc|def)+", "defabc", TRUE, TRUE); 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".+y", "zippity dooy dah ", TRUE, FALSE); 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".+y", "zippity dooy", TRUE, TRUE); 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ? 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab?", "ab", TRUE, TRUE); 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab?", "a", TRUE, TRUE); 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab?", "ac", TRUE, FALSE); 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab?", "abb", TRUE, FALSE); 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a(b|c)?d", "abd", TRUE, TRUE); 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a(b|c)?d", "acd", TRUE, TRUE); 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a(b|c)?d", "ad", TRUE, TRUE); 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a(b|c)?d", "abcd", FALSE, FALSE); 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a(b|c)?d", "ab", FALSE, FALSE); 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Escape sequences that become single literal chars, handled internally 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // by ICU's Unescape. 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // REGEX_TESTLM("\101\142", "Ab", TRUE, TRUE); // Octal TODO: not implemented yet. 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\a", "\\u0007", TRUE, TRUE); // BEL 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\cL", "\\u000c", TRUE, TRUE); // Control-L 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\e", "\\u001b", TRUE, TRUE); // Escape 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\f", "\\u000c", TRUE, TRUE); // Form Feed 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\n", "\\u000a", TRUE, TRUE); // new line 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\r", "\\u000d", TRUE, TRUE); // CR 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\t", "\\u0009", TRUE, TRUE); // Tab 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\u1234", "\\u1234", TRUE, TRUE); 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\U00001234", "\\u1234", TRUE, TRUE); 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".*\\Ax", "xyz", TRUE, FALSE); // \A matches only at the beginning of input 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".*\\Ax", " xyz", FALSE, FALSE); // \A matches only at the beginning of input 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Escape of special chars in patterns 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\\\\\|\\(\\)\\[\\{\\~\\$\\*\\+\\?\\.", "\\\\|()[{~$*+?.", TRUE, TRUE); 71350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 71650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 71750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 71850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// UTextBasic Check for quirks that are specific to the UText 71950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// implementation. 72050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 72150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 72250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::UTextBasic() { 72327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 72450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 72550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText pattern = UTEXT_INITIALIZER; 72627f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_abc, -1, &status); 72750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher matcher(&pattern, 0, status); 72850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 72950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 73050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 73127f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_abc, -1, &status); 73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 73350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher.reset(&input); 73450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 73527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); 73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher.reset(matcher.inputText()); 73850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 73927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); 74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 74150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&pattern); 74250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// API_Match Test that the API for class RegexMatcher 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// is present and nominally working, but excluding functions 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// implementing replace operations. 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Match() { 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags = 0; 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Debug - slide failing test cases early 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Simple pattern compilation 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re("abc"); 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat2; 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat2 = RegexPattern::compile(re, flags, pe, status); 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString inStr1 = "abcdef this is a test"; 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString instr2 = "not abc"; 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString empty = ""; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Matcher creation and reset. 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *m1 = pat2->matcher(inStr1, status); 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(status) == TRUE); 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->input() == inStr1); 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(instr2); 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(status) == FALSE); 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->input() == instr2); 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(inStr1); 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->input() == inStr1); 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(status) == TRUE); 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(empty); 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(status) == FALSE); 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->input() == empty); 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(&m1->pattern() == pat2); 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // reset(pos, status) 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(inStr1); 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(4, status); 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->input() == inStr1); 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(status) == TRUE); 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(-1, status); 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(0, status); 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len = m1->input().length(); 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(len-1, status); 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(len, status); 82227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 82327f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 82427f654740f2a26ad62a5c155af9199af9e69b889claireho 82527f654740f2a26ad62a5c155af9199af9e69b889claireho m1->reset(len+1, status); 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match(pos, status) 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(instr2); 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(4, status) == TRUE); 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(); 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(3, status) == FALSE); 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(); 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(5, status) == FALSE); 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(4, status) == TRUE); 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(-1, status) == FALSE); 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Match() at end of string should fail, but should not 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // be an error. 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = m1->input().length(); 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(len, status) == FALSE); 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Match beyond end of string should fail with an error. 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(len+1, status) == FALSE); 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Successful match at end of string. 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m("A?", 0, status); // will match zero length string. 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(inStr1); 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = inStr1.length(); 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.matches(len, status) == TRUE); 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(empty); 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.matches(0, status) == TRUE); 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // lookingAt(pos, status) 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(instr2); // "not abc" 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(5, status) == FALSE); 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(3, status) == FALSE); 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE); 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = m1->input().length(); 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(len, status) == FALSE); 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(len+1, status) == FALSE); 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete m1; 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat2; 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Capture Group. 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RegexMatcher::start(); 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RegexMatcher::end(); 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RegexMatcher::groupCount(); 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags=0; 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re("01(23(45)67)(.*)"); 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString data = "0123456789"; 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher = pat->matcher(data, status); 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->lookingAt(status) == TRUE); 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const int32_t matchStarts[] = {0, 2, 4, 8}; 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const int32_t matchEnds[] = {10, 8, 6, 10}; 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<4; i++) { 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t actualStart = matcher->start(i, status); 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (actualStart != matchStarts[i]) { 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure at line %d, index %d. Expected %d, got %d\n", 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru __LINE__, i, matchStarts[i], actualStart); 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t actualEnd = matcher->end(i, status); 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (actualEnd != matchEnds[i]) { 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure at line %d index %d. Expected %d, got %d\n", 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru __LINE__, i, matchEnds[i], actualEnd); 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(0, status) == matcher->start(status)); 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->end(0, status) == matcher->end(status)); 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(); 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->lookingAt(status); 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->group(status) == "0123456789"); 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->group(0, status) == "0123456789"); 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->group(1, status) == "234567" ); 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->group(2, status) == "45" ); 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->group(3, status) == "89" ); 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(); 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher; 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat; 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags=0; 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re("abc"); 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString data = ".abc..abc...abc.."; 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 012345678901234567 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher = pat->matcher(data, status); 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 1); 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 6); 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 12); 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find() == FALSE); 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find() == FALSE); 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(); 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 1); 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(0, status)); 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 1); 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(1, status)); 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 1); 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(2, status)); 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 6); 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(12, status)); 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 12); 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(13, status) == FALSE); 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(16, status) == FALSE); 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(17, status) == FALSE); 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE); 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR); 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->groupCount() == 0); 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher; 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat; 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find, with \G in pattern (true if at the end of a previous match). 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags=0; 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString re(".*?(?:(\\Gabc)|(abc))", -1, US_INV); 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString data = ".abcabc.abc.."; 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 012345678901234567 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher = pat->matcher(data, status); 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 0); 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(1, status) == -1); 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(2, status) == 1); 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 4); 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(1, status) == 4); 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(2, status) == -1); 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher; 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat; 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find with zero length matches, match position should bump ahead 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to prevent loops. 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m("(?= ?)", 0, status); // This pattern will zero-length matches anywhere, 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // using an always-true look-ahead. 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s(" "); 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(s); 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; ; i++) { 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (m.find() == FALSE) { 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.start(status) == i); 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.end(status) == i); 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(i==5); 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check that the bump goes over surrogate pairs OK 1061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s = UNICODE_STRING_SIMPLE("\\U00010001\\U00010002\\U00010003\\U00010004"); 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s = s.unescape(); 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(s); 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; ; i+=2) { 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (m.find() == FALSE) { 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.start(status) == i); 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.end(status) == i); 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(i==10); 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find() loop breaking test. 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // with pattern of /.?/, should see a series of one char matches, then a single 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match of zero length at the end of the input string. 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m(".?", 0, status); 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s(" "); 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(s); 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; ; i++) { 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (m.find() == FALSE) { 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.start(status) == i); 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(i==5); 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Matchers with no input string behave as if they had an empty input string. 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m(".?", 0, status); 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.find()); 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.start(status) == 0); 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.input() == ""); 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *p = RegexPattern::compile(".", 0, status); 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *m = p->matcher(status); 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m->find() == FALSE); 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m->input() == ""); 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete m; 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete p; 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Regions 1120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString testString("This is test data"); 1124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher m(".*", testString, 0, status); 1125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionStart() == 0); 1127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionEnd() == testString.length()); 1128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru m.region(2,4, status); 1132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.matches(status)); 1134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.start(status)==2); 1135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.end(status)==4); 1136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru m.reset(); 1139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionStart() == 0); 1140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionEnd() == testString.length()); 1141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString shorterString("short"); 1143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru m.reset(shorterString); 1144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionStart() == 0); 1145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionEnd() == shorterString.length()); 1146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); 1149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 1150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.reset()); 1151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 1152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); 1154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.reset()); 1156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); 1160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 1161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.reset()); 1162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 1163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); 1165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.reset()); 1167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // hitEnd() and requireEnd() 1173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString testString("aabb"); 1177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher m1(".*", testString, 0, status); 1178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m1.lookingAt(status) == TRUE); 1179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m1.hitEnd() == TRUE); 1180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m1.requireEnd() == FALSE); 1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 1184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher m2("a*", testString, 0, status); 1185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m2.lookingAt(status) == TRUE); 1186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m2.hitEnd() == FALSE); 1187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m2.requireEnd() == FALSE); 1188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 1191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher m3(".*$", testString, 0, status); 1192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m3.lookingAt(status) == TRUE); 1193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m3.hitEnd() == TRUE); 1194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m3.requireEnd() == TRUE); 1195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compilation error on reset with UChar * 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // These were a hazard that people were stumbling over with runtime errors. 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Changed them to compiler errors by adding private methods that more closely 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // matched the incorrect use of the functions. 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar ucharString[20]; 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m(".", 0, status); 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(ucharString); // should not compile. 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *p = RegexPattern::compile(".", 0, status); 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *m2 = p->matcher(ucharString, status); // should not compile. 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m3(".", ucharString, 0, status); // Should not compile 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Time Outs. 1221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Note: These tests will need to be changed when the regexp engine is 1222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // able to detect and cut short the exponential time behavior on 1223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // this type of match. 1224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Enough 'a's in the string to cause the match to time out. 1228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // (Each on additonal 'a' doubles the time) 1229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa"); 1230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher("(a+)+b", testString, 0, status); 1231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getTimeLimit() == 0); 1233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setTimeLimit(100, status); 1234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getTimeLimit() == 100); 1235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(status == U_REGEX_TIME_OUT); 1237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Few enough 'a's to slip in under the time limit. 1241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString testString("aaaaaaaaaaaaaaaaaa"); 1242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher("(a+)+b", testString, 0, status); 1243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setTimeLimit(100, status); 1245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Stack Limits 1251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000, filled with 'A' 1255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations 1257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // of the '+', and makes the stack frames larger. 1258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher("(A)+A$", testString, 0, status); 1259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // With the default stack, this match should fail to run 1261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); 1263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // With unlimited stack, it should run 1265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 1266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setStackLimit(0, status); 1267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.lookingAt(status) == TRUE); 1269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getStackLimit() == 0); 1271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // With a limited stack, it the match should fail 1273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 1274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setStackLimit(10000, status); 1275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); 1277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getStackLimit() == 10000); 1278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // A pattern that doesn't save state should work with 1281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // a minimal sized stack 1282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString testString = "abc"; 1285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher("abc", testString, 0, status); 1286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setStackLimit(30, status); 1288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.matches(status) == TRUE); 1290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getStackLimit() == 30); 1292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Negative stack sizes should fail 1294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 1295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setStackLimit(1000, status); 1296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setStackLimit(-1, status); 1298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getStackLimit() == 1000); 1300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// API_Replace API test for class RegexMatcher, testing the 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Replace family of functions. 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Replace() { 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Replace 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags=0; 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re("abc"); 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString data = ".abc..abc...abc.."; 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 012345678901234567 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher = pat->matcher(data, status); 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Plain vanilla matches. 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString dest; 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceFirst("yz", status); 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ".yz..abc...abc.."); 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceAll("yz", status); 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ".yz..yz...yz.."); 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Plain vanilla non-matches. 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString d2 = ".abx..abx...abx.."; 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(d2); 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceFirst("yz", status); 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ".abx..abx...abx.."); 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceAll("yz", status); 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ".abx..abx...abx.."); 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Empty source string 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString d3 = ""; 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(d3); 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceFirst("yz", status); 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ""); 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceAll("yz", status); 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ""); 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Empty substitution string 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(data); // ".abc..abc...abc.." 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceFirst("", status); 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "...abc...abc.."); 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceAll("", status); 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "........"); 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match whole string 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString d4 = "abc"; 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(d4); 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceFirst("xyz", status); 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "xyz"); 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceAll("xyz", status); 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "xyz"); 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Capture Group, simple case 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re2("a(..)"); 1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat2 = RegexPattern::compile(re2, flags, pe, status); 1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString d5 = "abcdefg"; 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher2 = pat2->matcher(d5, status); 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher2->replaceFirst("$1$1", status); 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "bcbcdefg"); 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1."), status); 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "The value of $1 is bc.defg"); 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher2->replaceFirst("$ by itself, no group number $$$", status); 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "$ by itself, no group number $$$defg"); 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U0001D7CF."); 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru replacement = replacement.unescape(); 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher2->replaceFirst(replacement, status); 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "Supplemental Digit 1 bc.defg"); 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher2->replaceFirst("bad capture group number $5...",status), U_INDEX_OUTOFBOUNDS_ERROR); 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Replacement String with \u hex escapes 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString src = "abc 1 abc 2 abc 3"; 1429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\u0043--"); 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(src); 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result = matcher->replaceAll(substitute, status); 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == "--C-- 1 --C-- 2 --C-- 3"); 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString src = "abc !"; 1437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\U00010000--"); 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(src); 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result = matcher->replaceAll(substitute, status); 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString expected = UnicodeString("--"); 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected.append((UChar32)0x10000); 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected.append("-- !"); 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == expected); 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: need more through testing of capture substitutions. 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Bug 4057 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s = "The matches start with ss and end with ee ss stuff ee fin"; 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m("ss(.*?)ee", 0, status); 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result; 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Multiple finds do NOT bump up the previous appendReplacement postion. 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(s); 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(); 1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(); 1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.appendReplacement(result, "ooh", status); 1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == "The matches start with ss and end with ee ooh"); 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // After a reset into the interior of a string, appendReplacemnt still starts at beginning. 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.truncate(0); 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(10, status); 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(); 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(); 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.appendReplacement(result, "ooh", status); 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == "The matches start with ss and end with ee ooh"); 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find() at interior of string, appendReplacemnt still starts at beginning. 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.truncate(0); 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(); 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(10, status); 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(); 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.appendReplacement(result, "ooh", status); 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == "The matches start with ss and end with ee ooh"); 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.appendTail(result); 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == "The matches start with ss and end with ee ooh fin"); 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher2; 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat2; 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher; 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat; 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// API_Pattern Test that the API for class RegexPattern is 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// present and nominally working. 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Pattern() { 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern pata; // Test default constructor to not crash. 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern patb; 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pata == patb); 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pata == pata); 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re1("abc[a-l][m-z]"); 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re2("def"); 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat1 = RegexPattern::compile(re1, 0, pe, status); 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat2 = RegexPattern::compile(re2, 0, pe, status); 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1 == *pat1); 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1 != pata); 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Assign 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru patb = *pat1; 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(patb == *pat1); 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy Construct 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern patc(*pat1); 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(patc == *pat1); 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(patb == patc); 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1 != pat2); 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru patb = *pat2; 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(patb != patc); 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(patb == *pat2); 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compile with no flags. 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat1a = RegexPattern::compile(re1, pe, status); 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1a == *pat1); 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1a->flags() == 0); 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compile with different flags should be not equal 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat1b = RegexPattern::compile(re1, UREGEX_CASE_INSENSITIVE, pe, status); 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1b != *pat1a); 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE); 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1a->flags() == 0); 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1b; 1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // clone 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat1c = pat1->clone(); 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1c == *pat1); 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1c != *pat2); 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1c; 1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1a; 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat2; 1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Verify that a matcher created from a cloned pattern works. 1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (Jitterbug 3423) 1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern *pSource = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\p{L}+"), 0, status); 1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pClone = pSource->clone(); 1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pSource; 1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *mFromClone = pClone->matcher(status); 1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s = "Hello World"; 1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mFromClone->reset(s); 1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(mFromClone->find() == TRUE); 1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(mFromClone->group(status) == "Hello"); 1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(mFromClone->find() == TRUE); 1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(mFromClone->group(status) == "World"); 1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(mFromClone->find() == FALSE); 1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete mFromClone; 1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pClone; 1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // matches convenience API 1584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches(".*", "random input", pe, status) == TRUE); 1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE); 1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE); 1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE); 1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE); 1594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_INDEX_OUTOFBOUNDS_ERROR; 1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); 1597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Split() 1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat1 = RegexPattern::compile(" +", pe, status); 1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString fields[10]; 1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n; 1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("Now is the time", fields, 10, status); 1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==4); 1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]=="Now"); 1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="is"); 1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="the"); 1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="time"); 1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]==""); 1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("Now is the time", fields, 2, status); 1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==2); 1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]=="Now"); 1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="is the time"); 1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="the"); // left over from previous test 1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[1] = "*"; 1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("Now is the time", fields, 1, status); 1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==1); 1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]=="Now is the time"); 1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="*"); 1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" Now is the time ", fields, 10, status); 1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1636b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==6); 1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==""); 1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="Now"); 1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="is"); 1640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="the"); 1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="time"); 1642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[5]==""); 1643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" ", fields, 10, status); 1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1646b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==2); 1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==""); 1648b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[1]==""); 1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[0] = "foo"; 1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("", fields, 10, status); 1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==0); 1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]=="foo"); 1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // split, with a pattern with (capture) 1659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pat1 = RegexPattern::compile(UNICODE_STRING_SIMPLE("<(\\w*)>"), pe, status); 1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status); 1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1665b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==7); 1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==""); 1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="b"); 1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="the time"); 1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[5]=="c"); 1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[6]==""); 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status==U_ZERO_ERROR); 1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" <a>Now is <b>the time<c>", fields, 10, status); 1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1677b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==7); 1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==" "); 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="b"); 1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="the time"); 1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[5]=="c"); 1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[6]==""); 1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[6] = "foo"; 1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" <a>Now is <b>the time<c>", fields, 6, status); 1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==6); 1691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==" "); 1692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="b"); 1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="the time"); 1696b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[5]==""); // All text following "<c>" field delimiter. 1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[6]=="foo"); 1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[5] = "foo"; 1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" <a>Now is <b>the time<c>", fields, 5, status); 1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==5); 1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==" "); 1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="b"); 1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="the time<c>"); 1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[5]=="foo"); 1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[5] = "foo"; 1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" <a>Now is <b>the time", fields, 5, status); 1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==5); 1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==" "); 1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="b"); 1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="the time"); 1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[5]=="foo"); 1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" <a>Now is <b>the time<c>", fields, 4, status); 1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==4); 1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==" "); 1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="the time<c>"); 1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat1 = RegexPattern::compile("([-,])", pe, status); 1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("1-10,20", fields, 10, status); 1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==5); 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]=="1"); 1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="-"); 1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="10"); 1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]==","); 1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="20"); 1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1746b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Test split of string with empty trailing fields 1747b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pat1 = RegexPattern::compile(",", pe, status); 1748b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 1749b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho n = pat1->split("a,b,c,", fields, 10, status); 1750b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 1751b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==4); 1752b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[0]=="a"); 1753b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[1]=="b"); 1754b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[2]=="c"); 1755b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[3]==""); 1756b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1757b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho n = pat1->split("a,,,", fields, 10, status); 1758b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 1759b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==4); 1760b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[0]=="a"); 1761b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[1]==""); 1762b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[2]==""); 1763b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[3]==""); 1764b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete pat1; 1765b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1766b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Split Separator with zero length match. 1767b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pat1 = RegexPattern::compile(":?", pe, status); 1768b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 1769b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho n = pat1->split("abc", fields, 10, status); 1770b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 1771b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==5); 1772b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[0]==""); 1773b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[1]=="a"); 1774b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[2]=="b"); 1775b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[3]=="c"); 1776b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[4]==""); 1777b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1778b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete pat1; 1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RegexPattern::pattern() 1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat1 = new RegexPattern(); 1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1->pattern() == ""); 1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat1 = RegexPattern::compile("(Hello, world)*", pe, status); 1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1->pattern() == "(Hello, world)*"); 1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // classID functions 1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat1 = RegexPattern::compile("(Hello, world)*", pe, status); 1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1->getDynamicClassID() == RegexPattern::getStaticClassID()); 1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1->getDynamicClassID() != NULL); 1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString Hello("Hello, world."); 1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *m = pat1->matcher(Hello, status); 1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1->getDynamicClassID() != m->getDynamicClassID()); 1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m->getDynamicClassID() == RegexMatcher::getStaticClassID()); 1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m->getDynamicClassID() != NULL); 1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete m; 1806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 181250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// API_Match_UTF8 Test that the alternate engine for class RegexMatcher 181350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// is present and working, but excluding functions 181450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// implementing replace operations. 1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 181750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Match_UTF8() { 181850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 181950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 182050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags = 0; 1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 182350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Debug - slide failing test cases early 1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 182550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if 0 182650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 182850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 182950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 183250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Simple pattern compilation 1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 183450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 183550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re = UTEXT_INITIALIZER; 183627f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&re, "abc", -1, &status); 1837b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_VERBOSE_TEXT(&re); 183850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat2; 183950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat2 = RegexPattern::compile(&re, flags, pe, status); 184050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 184250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input1 = UTEXT_INITIALIZER; 184350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input2 = UTEXT_INITIALIZER; 184450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText empty = UTEXT_INITIALIZER; 184527f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &status); 184627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&input1); 184727f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status); 184827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&input2); 184950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&empty, NULL, 0, &status); 185050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 185127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */ 185250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t input2Len = strlen("not abc"); 1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 185550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 185650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Matcher creation and reset. 185750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 1858b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1); 185950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 186050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(status) == TRUE); 186127f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcdefthisisatest[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x00 }; /* abcdef this is a test */ 186227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); 186350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&input2); 186450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(status) == FALSE); 186527f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_notabc[] = { 0x6e, 0x6f, 0x74, 0x20, 0x61, 0x62, 0x63, 0x00 }; /* not abc */ 186627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_notabc, m1->inputText()); 186750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&input1); 186827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); 186950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(status) == TRUE); 187050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&empty); 187150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(status) == FALSE); 187250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(utext_nativeLength(&empty) == 0); 1873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 187450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 187550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // reset(pos, status) 187650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 187750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&input1); 187850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(4, status); 187950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 188027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); 188150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(status) == TRUE); 1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 188350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(-1, status); 188450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 188750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(0, status); 188850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 188950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 1890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 189150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(input1Len-1, status); 189250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 189350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 189450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 189550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(input1Len, status); 189627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 189727f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 189827f654740f2a26ad62a5c155af9199af9e69b889claireho 189927f654740f2a26ad62a5c155af9199af9e69b889claireho m1->reset(input1Len+1, status); 190050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 190150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 190450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match(pos, status) 1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 190650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&input2); 190750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(4, status) == TRUE); 190850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(); 190950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(3, status) == FALSE); 191050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(); 191150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(5, status) == FALSE); 191250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(4, status) == TRUE); 191350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(-1, status) == FALSE); 191450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 191650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match() at end of string should fail, but should not 191750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // be an error. 191850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 191950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(input2Len, status) == FALSE); 192050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 192250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match beyond end of string should fail with an error. 192350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 192450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(input2Len+1, status) == FALSE); 192550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 192750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Successful match at end of string. 192850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 192950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 193050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m("A?", 0, status); // will match zero length string. 193150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 193250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&input1); 193350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.matches(input1Len, status) == TRUE); 193450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 193550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&empty); 193650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.matches(0, status) == TRUE); 193750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 194250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // lookingAt(pos, status) 1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 194450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 194550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&input2); // "not abc" 194650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 194750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(5, status) == FALSE); 194850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(3, status) == FALSE); 194950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 195050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE); 195150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 195250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 195350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE); 195450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 195550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE); 195650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 195850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete m1; 195950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat2; 196050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 196150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re); 196250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input1); 196350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input2); 196450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&empty); 196550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 196850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 196950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Capture Group. 197050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // RegexMatcher::start(); 197150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // RegexMatcher::end(); 197250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // RegexMatcher::groupCount(); 197350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 197450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 197550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags=0; 197650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 197750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 197850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re=UTEXT_INITIALIZER; 197927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */ 198027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re, str_01234567_pat, -1, &status); 198150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 198250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 198350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 198450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 198550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 198627f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ 198727f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_0123456789, -1, &status); 1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1989b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *matcher = &pat->matcher(status)->reset(&input); 199050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 199150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->lookingAt(status) == TRUE); 199250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static const int32_t matchStarts[] = {0, 2, 4, 8}; 199350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static const int32_t matchEnds[] = {10, 8, 6, 10}; 199450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 199550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<4; i++) { 199650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t actualStart = matcher->start(i, status); 199750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 199850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (actualStart != matchStarts[i]) { 199927f654740f2a26ad62a5c155af9199af9e69b889claireho errln("RegexTest failure at %s:%d, index %d. Expected %d, got %d\n", 200027f654740f2a26ad62a5c155af9199af9e69b889claireho __FILE__, __LINE__, i, matchStarts[i], actualStart); 200150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 200250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t actualEnd = matcher->end(i, status); 200350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 200450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (actualEnd != matchEnds[i]) { 200527f654740f2a26ad62a5c155af9199af9e69b889claireho errln("RegexTest failure at %s:%d index %d. Expected %d, got %d\n", 200627f654740f2a26ad62a5c155af9199af9e69b889claireho __FILE__, __LINE__, i, matchEnds[i], actualEnd); 200750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 200850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 201050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(0, status) == matcher->start(status)); 201150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->end(0, status) == matcher->end(status)); 2012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 201350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 201450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 201550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(); 201650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); 2017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 201850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->lookingAt(status); 201950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 202050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString dest; 202150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText destText = UTEXT_INITIALIZER; 202250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&destText, &dest, &status); 202350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *result; 202427f654740f2a26ad62a5c155af9199af9e69b889claireho //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ 202527f654740f2a26ad62a5c155af9199af9e69b889claireho // Test shallow-clone API 202627f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t group_len; 202727f654740f2a26ad62a5c155af9199af9e69b889claireho result = matcher->group((UText *)NULL, group_len, status); 202850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 202927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 203050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 203127f654740f2a26ad62a5c155af9199af9e69b889claireho result = matcher->group(0, &destText, group_len, status); 203250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 203350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 203427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 203527f654740f2a26ad62a5c155af9199af9e69b889claireho // destText is now immutable, reopen it 203627f654740f2a26ad62a5c155af9199af9e69b889claireho utext_close(&destText); 203727f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUnicodeString(&destText, &dest, &status); 203850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 203950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(0, NULL, status); 204050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 204127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 204250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 204350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(0, &destText, status); 204450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 204550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 204627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 204750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 204850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(1, NULL, status); 204950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 205027f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */ 205127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_234567, result); 205250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 205350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(1, &destText, status); 205450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 205550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 205627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_234567, result); 205750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 205850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(2, NULL, status); 205950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 206027f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */ 206127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_45, result); 206250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 206350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(2, &destText, status); 206450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 206550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 206627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_45, result); 206750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 206850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(3, NULL, status); 206950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 207027f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */ 207127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_89, result); 207250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 207350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(3, &destText, status); 207450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 207550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 207627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_89, result); 2077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 207850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 207950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 208050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(); 208150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); 2082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 208350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 208450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat; 208550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 208650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&destText); 208750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 208850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re); 208950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 209250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // find 2093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 209450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 209550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags=0; 209650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 209750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 209850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re=UTEXT_INITIALIZER; 209927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 210027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re, str_abc, -1, &status); 2101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 210250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 210350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 210450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 210527f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ 210627f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_abcabcabc, -1, &status); 210750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 012345678901234567 2108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *matcher = &pat->matcher(status)->reset(&input); 211050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 211150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 211250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 1); 211350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 211450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 6); 211550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 211650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 12); 211750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find() == FALSE); 211850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find() == FALSE); 2119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 212050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(); 212150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 212250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 1); 2123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 212450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(0, status)); 212550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 1); 212650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(1, status)); 212750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 1); 212850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(2, status)); 212950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 6); 213050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(12, status)); 213150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 12); 213250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(13, status) == FALSE); 213350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(16, status) == FALSE); 213450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(17, status) == FALSE); 213550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE); 2136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 213750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 213850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 213950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 214050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR); 2141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 214250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->groupCount() == 0); 214350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 214450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 214550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat; 214650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 214750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 214850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re); 2149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 215150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 215350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // find, with \G in pattern (true if at the end of a previous match). 2154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 215550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 215650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags=0; 215750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 215850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 215950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re=UTEXT_INITIALIZER; 216027f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x00 }; /* .*?(?:(\\Gabc)|(abc)) */ 216127f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re, str_Gabcabc, -1, &status); 2162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 216350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 216450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 216650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 216727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */ 216827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_abcabcabc, -1, &status); 216950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 012345678901234567 2170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *matcher = &pat->matcher(status)->reset(&input); 217250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 217350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 217450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 0); 217550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(1, status) == -1); 217650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(2, status) == 1); 2177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 217850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 217950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 4); 218050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(1, status) == 4); 218150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(2, status) == -1); 218250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 218350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 218450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 218550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat; 218650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 218750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 218850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re); 2189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 219250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // find with zero length matches, match position should bump ahead 219350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // to prevent loops. 2194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 219550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 219650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 219750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 219850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m("(?= ?)", 0, status); // This pattern will zero-length matches anywhere, 219950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // using an always-true look-ahead. 220050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 220150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText s = UTEXT_INITIALIZER; 220250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&s, " ", -1, &status); 220350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&s); 220450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; ; i++) { 220550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (m.find() == FALSE) { 220650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 220750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 220850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.start(status) == i); 220950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.end(status) == i); 221050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 221150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(i==5); 221250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 221350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check that the bump goes over characters outside the BMP OK 221450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // "\\U00010001\\U00010002\\U00010003\\U00010004".unescape()...in UTF-8 221550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho unsigned char aboveBMP[] = {0xF0, 0x90, 0x80, 0x81, 0xF0, 0x90, 0x80, 0x82, 0xF0, 0x90, 0x80, 0x83, 0xF0, 0x90, 0x80, 0x84, 0x00}; 221650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&s, (char *)aboveBMP, -1, &status); 221750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&s); 221827f654740f2a26ad62a5c155af9199af9e69b889claireho for (i=0; ; i+=4) { 221950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (m.find() == FALSE) { 222050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 222150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 222250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.start(status) == i); 222350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.end(status) == i); 2224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 222527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(i==20); 222650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 222750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&s); 222850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 222950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 223050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // find() loop breaking test. 223150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // with pattern of /.?/, should see a series of one char matches, then a single 223250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match of zero length at the end of the input string. 223350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 223450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 223550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m(".?", 0, status); 223650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 223750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText s = UTEXT_INITIALIZER; 223850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&s, " ", -1, &status); 223950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&s); 224050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; ; i++) { 224150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (m.find() == FALSE) { 224250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 224350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 224450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.start(status) == i); 224550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); 224650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 224750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(i==5); 224850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 224950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&s); 2250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 225150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 225450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Matchers with no input string behave as if they had an empty input string. 2255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 2256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 225750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 225850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 225950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m(".?", 0, status); 226050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 226150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.find()); 226250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.start(status) == 0); 226350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.input() == ""); 2264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 226550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 226650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 226750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *p = RegexPattern::compile(".", 0, status); 226850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *m = p->matcher(status); 226950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 227150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m->find() == FALSE); 227250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0); 227350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete m; 227450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete p; 2275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 227650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 227750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 227850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Regions 227950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 228050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 228150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 228250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText testPattern = UTEXT_INITIALIZER; 228350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText testText = UTEXT_INITIALIZER; 228427f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status); 228527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&testPattern); 228627f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status); 228727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&testText); 228850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 228950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m(&testPattern, &testText, 0, status); 229050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 229150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionStart() == 0); 229250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); 229350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 229450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 229550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 229650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.region(2,4, status); 229750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 229850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.matches(status)); 229950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.start(status)==2); 230050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.end(status)==4); 230150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 230250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 230350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(); 230450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionStart() == 0); 230550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); 230650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 230727f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&testText, "short", -1, &status); 230827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&testText); 230950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&testText); 231050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionStart() == 0); 231150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short")); 231250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 231350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 231450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); 231550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 231650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.reset()); 231750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 231850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 231950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); 232050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 232150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.reset()); 232250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 232350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 232450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 232550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); 232650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 232750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.reset()); 232850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 2329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 233050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); 233150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 233250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.reset()); 233350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 233450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 233550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&testText); 233650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&testPattern); 2337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 233850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 233950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 234050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // hitEnd() and requireEnd() 234150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 234250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 234350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 234450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText testPattern = UTEXT_INITIALIZER; 234550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText testText = UTEXT_INITIALIZER; 234627f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */ 234727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */ 234827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&testPattern, str_, -1, &status); 234927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&testText, str_aabb, -1, &status); 235050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 235150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m1(&testPattern, &testText, 0, status); 235250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1.lookingAt(status) == TRUE); 235350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1.hitEnd() == TRUE); 235450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1.requireEnd() == FALSE); 235550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 235650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 235750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 235827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */ 235927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&testPattern, str_a, -1, &status); 236050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m2(&testPattern, &testText, 0, status); 236150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m2.lookingAt(status) == TRUE); 236250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m2.hitEnd() == FALSE); 236350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m2.requireEnd() == FALSE); 236450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 236650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 236727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */ 236827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&testPattern, str_dotstardollar, -1, &status); 236950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m3(&testPattern, &testText, 0, status); 237050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m3.lookingAt(status) == TRUE); 237150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m3.hitEnd() == TRUE); 237250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m3.requireEnd() == TRUE); 237350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 237450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 237550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&testText); 237650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&testPattern); 2377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 2379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------- 2382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 238350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// API_Replace_UTF8 API test for class RegexMatcher, testing the 238450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Replace family of functions. 2385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 238750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Replace_UTF8() { 238850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 238950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Replace 239050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 239150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags=0; 239250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 239350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 2394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 239550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re=UTEXT_INITIALIZER; 239627f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&re, "abc", -1, &status); 239727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&re); 239850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 239950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 240050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 240127f654740f2a26ad62a5c155af9199af9e69b889claireho char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ 240250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 012345678901234567 240350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText dataText = UTEXT_INITIALIZER; 240450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&dataText, data, -1, &status); 240527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 240627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&dataText); 2407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText); 2408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 240950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 241050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Plain vanilla matches. 241150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 241250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString dest; 241350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText destText = UTEXT_INITIALIZER; 241450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&destText, &dest, &status); 241550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *result; 241650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 241750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replText = UTEXT_INITIALIZER; 241850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 241927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */ 242027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_yz, -1, &status); 242127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&replText); 242250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, NULL, status); 242350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 242427f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */ 242527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); 242650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 242750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, &destText, status); 242850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 242950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 243027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); 2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 243250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 243350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 243427f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_yzyzyz[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x00 }; /* .yz..yz...yz.. */ 243527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); 243650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 2437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 243850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 243950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 244050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 244150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 244227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); 244350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 244450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 244550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Plain vanilla non-matches. 244650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 244727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */ 244827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_abxabxabx, -1, &status); 244950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 245050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 245150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, NULL, status); 245250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 245327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 245450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 245550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, &destText, status); 245650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 245750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 245827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 245950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 246050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 246150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 246227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 246350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 246450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 246550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 246650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 246750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 246827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 246950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 247050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 247150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Empty source string 247250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 247350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&dataText, NULL, 0, &status); 247450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 247550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 247650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, NULL, status); 247750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 247827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8("", result); 247950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 248050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, &destText, status); 248150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 248250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 248327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8("", result); 248450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 248550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 248650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 248727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8("", result); 248850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 248950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 249050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 249150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 249227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8("", result); 249350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 249450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 249550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Empty substitution string 249650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 249750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.." 249850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 249950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 250050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&replText, NULL, 0, &status); 250150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, NULL, status); 250250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 250327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */ 250427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); 250550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 250650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, &destText, status); 250750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 250850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 250927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); 251050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 251150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 251250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 251327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_dots[] = { 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x00 }; /* ........ */ 251427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_dots, result); 251550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 251650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 251750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 251850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 251950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 252027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_dots, result); 252150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 252250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 252350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match whole string 252450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 252527f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 252627f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_abc, -1, &status); 252750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 252850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 252927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_xyz[] = { 0x78, 0x79, 0x7a, 0x00 }; /* xyz */ 253027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_xyz, -1, &status); 253150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, NULL, status); 253250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 253327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 253450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 253550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 253650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, &destText, status); 253750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 253850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 253927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 254050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 254150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 254250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 254327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 254450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 254550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 254650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 254750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 254850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 254927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 255050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 255150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 255250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Capture Group, simple case 255350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 255427f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */ 255527f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re, str_add, -1, &status); 255650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status); 255750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 255827f654740f2a26ad62a5c155af9199af9e69b889claireho 255927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* abcdefg */ 256027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_abcdefg, -1, &status); 2561b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText); 256250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 256350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 256427f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */ 256527f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_11, -1, &status); 256650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, NULL, status); 256750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 256827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* bcbcdefg */ 256927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); 257050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 257150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 257250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, &destText, status); 257350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 257450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 257527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); 2576b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 2577b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */ 2578b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho utext_openUTF8(&replText, str_v, -1, &status); 2579b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_VERBOSE_TEXT(&replText); 258050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, NULL, status); 258150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 258227f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg */ 258327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); 258450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 258550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 258650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, &destText, status); 258750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 258850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 258927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); 259050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 259127f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */ 259227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); 259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, NULL, status); 259450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 259527f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */ 259627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); 259750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 259850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 259950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, &destText, status); 260050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 260150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 260227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); 260350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 260427f654740f2a26ad62a5c155af9199af9e69b889claireho unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx. */ 260527f654740f2a26ad62a5c155af9199af9e69b889claireho //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001D7CF, MATHEMATICAL BOLD DIGIT ONE 260650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 012345678901234567890123456 260750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho supplDigitChars[22] = 0xF0; 260850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho supplDigitChars[23] = 0x9D; 260950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho supplDigitChars[24] = 0x9F; 261050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho supplDigitChars[25] = 0x8F; 261150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status); 261250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 261350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, NULL, status); 261450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 261527f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */ 261627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); 261750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 261850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 261950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, &destText, status); 262050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 262150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 262227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); 262327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /* bad capture group number $5..." */ 262427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status); 262550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status)), U_INDEX_OUTOFBOUNDS_ERROR); 262627f654740f2a26ad62a5c155af9199af9e69b889claireho// REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); 262750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 262850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 262950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, status)), U_INDEX_OUTOFBOUNDS_ERROR); 263050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 263127f654740f2a26ad62a5c155af9199af9e69b889claireho// REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); 263250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 263350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 263450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Replacement String with \u hex escapes 263550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 2636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 263727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61, 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 abc 2 abc 3 */ 263827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33, 0x2d, 0x2d, 0x00 }; /* --\u0043-- */ 263927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status); 264027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_u0043, -1, &status); 264150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 264250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 264350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 264450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 264527f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */ 264627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); 264750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 264850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 264950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 265050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 265150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 265227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); 265350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 265450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 265527f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */ 265627f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_abc, -1, &status); 265727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */ 265827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_U00010000, -1, &status); 265950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 266050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 266127f654740f2a26ad62a5c155af9199af9e69b889claireho unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A" 266250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 0123456789 266350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expected[2] = 0xF0; 266450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expected[3] = 0x90; 266550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expected[4] = 0x80; 266650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expected[5] = 0x80; 266750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 266850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 266950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 267027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); 267150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 267250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 267350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 267450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 267550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 267627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); 2677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 267850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: need more through testing of capture substitutions. 2679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 268050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Bug 4057 268150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 268250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 268350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 268427f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65, 0x00 }; /* ss(.*?)ee */ 268527f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66, 0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ss stuff ee fin */ 268627f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ 268727f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re, str_ssee, -1, &status); 268827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_blah, -1, &status); 268927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_ooh, -1, &status); 269050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 269150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m(&re, 0, status); 269250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 269350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 269450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString result; 269550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText resultText = UTEXT_INITIALIZER; 269650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &result, &status); 2697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 269850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Multiple finds do NOT bump up the previous appendReplacement postion. 269950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&dataText); 270050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(); 270150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(); 270250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.appendReplacement(&resultText, &replText, status); 270350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 270427f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_blah2[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ 270527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_blah2, &resultText); 2706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 270750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // After a reset into the interior of a string, appendReplacement still starts at beginning. 270850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 270950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result.truncate(0); 271050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &result, &status); 271150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(10, status); 271250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(); 271350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(); 271450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.appendReplacement(&resultText, &replText, status); 271550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 271627f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_blah3[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ 271727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_blah3, &resultText); 2718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 271950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // find() at interior of string, appendReplacement still starts at beginning. 272050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 272150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result.truncate(0); 272250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &result, &status); 272350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(); 272450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(10, status); 272550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(); 272650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.appendReplacement(&resultText, &replText, status); 272750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 272827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ 272927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText); 2730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 273127f654740f2a26ad62a5c155af9199af9e69b889claireho m.appendTail(&resultText, status); 273227f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */ 273327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText); 273450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 273550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&resultText); 273650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2737b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 273850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher2; 273950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat2; 274050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 274150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat; 274250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 274350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&dataText); 274450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replText); 274550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&destText); 274650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re); 2747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 275050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 2751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 275250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// API_Pattern_UTF8 Test that the API for class RegexPattern is 275350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// present and nominally working. 275450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 275550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 275650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Pattern_UTF8() { 275750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern pata; // Test default constructor to not crash. 275850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern patb; 275950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 276050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pata == patb); 276150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pata == pata); 276250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 276350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re1 = UTEXT_INITIALIZER; 276450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re2 = UTEXT_INITIALIZER; 276550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 276650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 276750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 276827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */ 276927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */ 277027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re1, str_abcalmz, -1, &status); 277127f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re2, str_def, -1, &status); 277250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 277350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status); 277450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status); 277550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 277650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1 == *pat1); 277750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1 != pata); 277850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 277950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Assign 278050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patb = *pat1; 278150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(patb == *pat1); 278250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 278350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Copy Construct 278450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern patc(*pat1); 278550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(patc == *pat1); 278650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(patb == patc); 278750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pat1 != pat2); 278850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patb = *pat2; 278950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(patb != patc); 279050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(patb == *pat2); 279150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 279250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compile with no flags. 279350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat1a = RegexPattern::compile(&re1, pe, status); 279450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1a == *pat1); 279550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 279650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pat1a->flags() == 0); 279750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 279850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compile with different flags should be not equal 279950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat1b = RegexPattern::compile(&re1, UREGEX_CASE_INSENSITIVE, pe, status); 280050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 280150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 280250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1b != *pat1a); 280350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE); 280450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pat1a->flags() == 0); 280550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1b; 280650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 280750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // clone 280850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat1c = pat1->clone(); 280950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1c == *pat1); 281050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1c != *pat2); 281150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 281250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1c; 281350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1a; 281450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 281550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat2; 281650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 281750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re1); 281850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re2); 281950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 282050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 282150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 282250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Verify that a matcher created from a cloned pattern works. 282350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (Jitterbug 3423) 282450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 282550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 282650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 282750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText pattern = UTEXT_INITIALIZER; 282827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */ 282927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_pL, -1, &status); 283050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 283150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status); 283250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pClone = pSource->clone(); 283350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pSource; 283450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *mFromClone = pClone->matcher(status); 283550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 283650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 283750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 283827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */ 283927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_HelloWorld, -1, &status); 284050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho mFromClone->reset(&input); 284150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(mFromClone->find() == TRUE); 284250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(mFromClone->group(status) == "Hello"); 284350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(mFromClone->find() == TRUE); 284450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(mFromClone->group(status) == "World"); 284550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(mFromClone->find() == FALSE); 284650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete mFromClone; 284750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pClone; 284850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 284950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 285050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&pattern); 285150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 285250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 285350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 285450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // matches convenience API 285550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 285650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 285750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 285850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText pattern = UTEXT_INITIALIZER; 285950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 286050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 286127f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */ 286227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_randominput, -1, &status); 286350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 286427f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */ 286527f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_dotstar, -1, &status); 286650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE); 286750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 286850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 286927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 287027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_abc, -1, &status); 287150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE); 287250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 287350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 287427f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */ 287527f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_nput, -1, &status); 287650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE); 287750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 287850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 287927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_randominput, -1, &status); 288050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE); 288150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 288250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 288327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */ 288427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_u, -1, &status); 288550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE); 288650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 288750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 288827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_abc, -1, &status); 288927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_abc, -1, &status); 289050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 289150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); 289250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 289350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 289450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 289550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&pattern); 289650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 289750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 289850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 289950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 290050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Split() 290150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 290250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 290327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */ 290427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re1, str_spaceplus, -1, &status); 290550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat1 = RegexPattern::compile(&re1, pe, status); 290650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 290750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString fields[10]; 290850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 290950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t n; 291050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("Now is the time", fields, 10, status); 291150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 291250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==4); 291350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]=="Now"); 291450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="is"); 291550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="the"); 291650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="time"); 291750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]==""); 291850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 291950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("Now is the time", fields, 2, status); 292050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 292150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==2); 292250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]=="Now"); 292350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="is the time"); 292450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="the"); // left over from previous test 292550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 292650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[1] = "*"; 292750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 292850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("Now is the time", fields, 1, status); 292950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 293050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==1); 293150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]=="Now is the time"); 293250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="*"); 293350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 293450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 293550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" Now is the time ", fields, 10, status); 293650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2937b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==6); 293850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==""); 293950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="Now"); 294050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="is"); 294150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="the"); 294250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="time"); 294350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[5]==""); 2944b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[6]==""); 294550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2946b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fields[2] = "*"; 294750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" ", fields, 10, status); 294850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==2); 295050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==""); 2951b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[1]==""); 2952b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[2]=="*"); 295350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 295450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[0] = "foo"; 295550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("", fields, 10, status); 295650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 295750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==0); 295850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]=="foo"); 295950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 296050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 296150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 296250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // split, with a pattern with (capture) 296327f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&re1, "<(\\w*)>", -1, &status); 296450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat1 = RegexPattern::compile(&re1, pe, status); 296550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 296650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 296750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 2968b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fields[6] = fields[7] = "*"; 296950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status); 297050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2971b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==7); 297250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==""); 297350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 297450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 297550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="b"); 297650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="the time"); 297750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[5]=="c"); 297850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[6]==""); 2979b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[7]=="*"); 298050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status==U_ZERO_ERROR); 298150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2982b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fields[6] = fields[7] = "*"; 298350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" <a>Now is <b>the time<c>", fields, 10, status); 298450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2985b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==7); 298650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==" "); 298750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 298850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 298950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="b"); 299050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="the time"); 299150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[5]=="c"); 299250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[6]==""); 2993b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[7]=="*"); 299450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 299550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 299650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[6] = "foo"; 2997b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho n = pat1->split(" <a>Now is <b>the time<c> ", fields, 6, status); 299850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 299950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==6); 300050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==" "); 300150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 300250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 300350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="b"); 300450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="the time"); 3005b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[5]==" "); 300650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[6]=="foo"); 300750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 300850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 300950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[5] = "foo"; 301050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" <a>Now is <b>the time<c>", fields, 5, status); 301150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 301250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==5); 301350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==" "); 301450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 301550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 301650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="b"); 301750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="the time<c>"); 301850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[5]=="foo"); 301950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 302050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 302150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[5] = "foo"; 302250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" <a>Now is <b>the time", fields, 5, status); 302350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 302450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==5); 302550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==" "); 302650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 302750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 302850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="b"); 302950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="the time"); 303050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[5]=="foo"); 303150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 303250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 303350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" <a>Now is <b>the time<c>", fields, 4, status); 303450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 303550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==4); 303650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==" "); 303750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 303850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 303950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="the time<c>"); 304050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 304150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 304250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 304327f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&re1, "([-,])", -1, &status); 304450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat1 = RegexPattern::compile(&re1, pe, status); 304550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 304650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("1-10,20", fields, 10, status); 304750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 304850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==5); 304950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]=="1"); 305050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="-"); 305150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="10"); 305250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]==","); 305350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="20"); 305450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 305550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 305650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 305750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 305850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // RegexPattern::pattern() and patternText() 305950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 306050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat1 = new RegexPattern(); 306150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pat1->pattern() == ""); 306227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText(status)); 306350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 3064b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const char *helloWorldInvariant = "(Hello, world)*"; 3065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status); 306650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat1 = RegexPattern::compile(&re1, pe, status); 306750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 3068b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT_UNISTR(pat1->pattern(),"(Hello, world)*"); 306927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status)); 307050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 307150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 307250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re1); 307350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 307450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 307550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 307650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 307750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 307850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Extended A more thorough check for features of regex patterns 307950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The test cases are in a separate data file, 308050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// source/tests/testdata/regextst.txt 308150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// A description of the test data format is included in that file. 308250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 308350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 308450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 308550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst char * 308650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexTest::getPath(char buffer[2048], const char *filename) { 308750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 308850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *testDataDirectory = IntlTest::getSourceTestData(status); 308950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 309050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("ERROR: loadTestData() failed - %s", u_errorName(status)); 309150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 309250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 309350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 309450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho strcpy(buffer, testDataDirectory); 309550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho strcat(buffer, filename); 309650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return buffer; 309750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 309850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 309950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Extended() { 310050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char tdd[2048]; 310150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *srcPath; 310250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 310350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t lineNum = 0; 310450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 310550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 310650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Open and read the test data file. 310750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 310850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcPath=getPath(tdd, "regextst.txt"); 310950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(srcPath==NULL) { 311050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; /* something went wrong, error already output */ 311150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 311250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 311350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len; 311450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *testData = ReadAndConvertFile(srcPath, len, "utf-8", status); 311550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 311650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; /* something went wrong, error already output */ 311750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 311850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 311950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 312050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Put the test data into a UnicodeString 312150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 312250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testString(FALSE, testData, len); 312350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 312450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher quotedStuffMat(UNICODE_STRING_SIMPLE("\\s*([\\'\\\"/])(.*?)\\1"), 0, status); 312550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher commentMat (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status); 3126103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius RegexMatcher flagsMat (UNICODE_STRING_SIMPLE("\\s*([ixsmdteDEGLMQvabtyYzZ2-9]*)([:letter:]*)"), 0, status); 312750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 312850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher lineMat(UNICODE_STRING_SIMPLE("(.*?)\\r?\\n"), testString, 0, status); 312950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testPattern; // The pattern for test from the test file. 313050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testFlags; // the flags for a test. 313150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString matchString; // The marked up string to be used as input 313250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 313350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)){ 313450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Construct RegexMatcher() error."); 313550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete [] testData; 313650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 313750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 313850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 313950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 314050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop over the test data file, once per line. 314150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 314250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (lineMat.find()) { 314350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lineNum++; 314450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 314527f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: ICU Error \"%s\"", srcPath, lineNum, u_errorName(status)); 314650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 314750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 314850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 314950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testLine = lineMat.group(1, status); 315050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (testLine.length() == 0) { 315150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 315250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 315350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 315450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 315550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Parse the test line. Skip blank and comment only lines. 315650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Separate out the three main fields - pattern, flags, target. 315750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 315850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 315950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho commentMat.reset(testLine); 316050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (commentMat.lookingAt(status)) { 316150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This line is a comment, or blank. 316250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 316350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 316450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 316550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 316650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pull out the pattern field, remove it from the test file line. 316750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 316850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho quotedStuffMat.reset(testLine); 316950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (quotedStuffMat.lookingAt(status)) { 317050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testPattern = quotedStuffMat.group(2, status); 317150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testLine.remove(0, quotedStuffMat.end(0, status)); 317250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 317327f654740f2a26ad62a5c155af9199af9e69b889claireho errln("Bad pattern (missing quotes?) at %s:%d", srcPath, lineNum); 317450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 317550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 317650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 317750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 317850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 317950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pull out the flags from the test file line. 318050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 318150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flagsMat.reset(testLine); 318250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flagsMat.lookingAt(status); // Will always match, possibly an empty string. 318350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testFlags = flagsMat.group(1, status); 318450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flagsMat.group(2, status).length() > 0) { 318550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Bad Match flag at line %d. Scanning %c\n", 318650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lineNum, flagsMat.group(2, status).charAt(0)); 318750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 318850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 318950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testLine.remove(0, flagsMat.end(0, status)); 319050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 319150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 319250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pull out the match string, as a whole. 319350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We'll process the <tags> later. 319450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 319550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho quotedStuffMat.reset(testLine); 319650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (quotedStuffMat.lookingAt(status)) { 319750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchString = quotedStuffMat.group(2, status); 319850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testLine.remove(0, quotedStuffMat.end(0, status)); 319950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 320050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Bad match string at test file line %d", lineNum); 320150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 320250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 320350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 320450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 320550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The only thing left from the input line should be an optional trailing comment. 320650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 320750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho commentMat.reset(testLine); 320850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (commentMat.lookingAt(status) == FALSE) { 320950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Line %d: unexpected characters at end of test line.", lineNum); 321050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 321150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 321250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 321450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Run the test 321550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 321627f654740f2a26ad62a5c155af9199af9e69b889claireho regex_find(testPattern, testFlags, matchString, srcPath, lineNum); 321750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 321850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete [] testData; 322050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 322150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 322250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 322350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 322450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 322550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 322650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 322750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// regex_find(pattern, flags, inputString, lineNumber) 322850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 322950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Function to run a single test from the Extended (data driven) tests. 323050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// See file test/testdata/regextst.txt for a description of the 323150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// pattern and inputString fields, and the allowed flags. 323250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// lineNumber is the source line in regextst.txt of the test. 323350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 323450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 323550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 323650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 323750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Set a value into a UVector at position specified by a decimal number in 323850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// a UnicodeString. This is a utility function needed by the actual test function, 323950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// which follows. 324050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic void set(UVector &vec, int32_t val, UnicodeString index) { 324150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 324250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t idx = 0; 324350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (int32_t i=0; i<index.length(); i++) { 324450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t d=u_charDigitValue(index.charAt(i)); 324550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (d<0) {return;} 324650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho idx = idx*10 + d; 324750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 324850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (vec.size()<idx+1) {vec.addElement(-1, status);} 324950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho vec.setElementAt(val, idx); 325050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 325150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 325227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic void setInt(UVector &vec, int32_t val, int32_t idx) { 325327f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status=U_ZERO_ERROR; 325427f654740f2a26ad62a5c155af9199af9e69b889claireho while (vec.size()<idx+1) {vec.addElement(-1, status);} 325527f654740f2a26ad62a5c155af9199af9e69b889claireho vec.setElementAt(val, idx); 325627f654740f2a26ad62a5c155af9199af9e69b889claireho} 325727f654740f2a26ad62a5c155af9199af9e69b889claireho 325827f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool utextOffsetToNative(UText *utext, int32_t unistrOffset, int32_t& nativeIndex) 325927f654740f2a26ad62a5c155af9199af9e69b889claireho{ 326027f654740f2a26ad62a5c155af9199af9e69b889claireho UBool couldFind = TRUE; 326127f654740f2a26ad62a5c155af9199af9e69b889claireho UTEXT_SETNATIVEINDEX(utext, 0); 326227f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t i = 0; 326327f654740f2a26ad62a5c155af9199af9e69b889claireho while (i < unistrOffset) { 326427f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c = UTEXT_NEXT32(utext); 326527f654740f2a26ad62a5c155af9199af9e69b889claireho if (c != U_SENTINEL) { 326627f654740f2a26ad62a5c155af9199af9e69b889claireho i += U16_LENGTH(c); 326727f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 326827f654740f2a26ad62a5c155af9199af9e69b889claireho couldFind = FALSE; 326927f654740f2a26ad62a5c155af9199af9e69b889claireho break; 327027f654740f2a26ad62a5c155af9199af9e69b889claireho } 327127f654740f2a26ad62a5c155af9199af9e69b889claireho } 3272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho nativeIndex = (int32_t)UTEXT_GETNATIVEINDEX(utext); 327327f654740f2a26ad62a5c155af9199af9e69b889claireho return couldFind; 327427f654740f2a26ad62a5c155af9199af9e69b889claireho} 327527f654740f2a26ad62a5c155af9199af9e69b889claireho 327627f654740f2a26ad62a5c155af9199af9e69b889claireho 327750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::regex_find(const UnicodeString &pattern, 327850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &flags, 327950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &inputString, 328027f654740f2a26ad62a5c155af9199af9e69b889claireho const char *srcPath, 328150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t line) { 328250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString unEscapedInput; 328350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString deTaggedInput; 328450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 328550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t patternUTF8Length, inputUTF8Length; 328650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *patternChars = NULL, *inputChars = NULL; 328750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText patternText = UTEXT_INITIALIZER; 328850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText inputText = UTEXT_INITIALIZER; 328950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UConverter *UTF8Converter = NULL; 329050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 329150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 329250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 329350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *parsePat = NULL; 329450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *parseMatcher = NULL; 329550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *callerPattern = NULL, *UTF8Pattern = NULL; 329650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *matcher = NULL, *UTF8Matcher = NULL; 329750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UVector groupStarts(status); 329850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UVector groupEnds(status); 329927f654740f2a26ad62a5c155af9199af9e69b889claireho UVector groupStartsUTF8(status); 330027f654740f2a26ad62a5c155af9199af9e69b889claireho UVector groupEndsUTF8(status); 330150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isMatch = FALSE, isUTF8Match = FALSE; 330250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool failed = FALSE; 330350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t numFinds; 330450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 330550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool useMatchesFunc = FALSE; 330650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool useLookingAtFunc = FALSE; 330750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t regionStart = -1; 330850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t regionEnd = -1; 330927f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t regionStartUTF8 = -1; 331027f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t regionEndUTF8 = -1; 331127f654740f2a26ad62a5c155af9199af9e69b889claireho 331250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 331350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 331450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compile the caller's pattern 331550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 331650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t bflags = 0; 331750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x69) >= 0) { // 'i' flag 331850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_CASE_INSENSITIVE; 331950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 332050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x78) >= 0) { // 'x' flag 332150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_COMMENTS; 332250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 332350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x73) >= 0) { // 's' flag 332450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_DOTALL; 332550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 332650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag 332750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_MULTILINE; 332850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 332950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 333050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag 333150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES; 333250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 333350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag 333450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_UNIX_LINES; 333550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3336103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag 3337103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius bflags |= UREGEX_LITERAL; 3338103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 333950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 334050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 334150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho callerPattern = RegexPattern::compile(pattern, bflags, pe, status); 334250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status != U_ZERO_ERROR) { 334350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #if UCONFIG_NO_BREAK_ITERATION==1 334450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 'v' test flag means that the test pattern should not compile if ICU was configured 334550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // to not include break iteration. RBBI is needed for Unicode word boundaries. 334650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) { 334750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 334850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 334950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 335050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E' 335150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Expected pattern compilation error. 335250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd' 335350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho logln("Pattern Compile returns \"%s\"", u_errorName(status)); 335450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 335550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 335650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 335750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Unexpected pattern compilation error. 3358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho dataerrln("Line %d: error %s compiling pattern.", line, u_errorName(status)); 335950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 336050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 336150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 336250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 336350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Converter = ucnv_open("UTF8", &status); 336450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); 336550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 336650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status); 336750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; // buffer overflow 336850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars = new char[patternUTF8Length+1]; 336950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status); 337050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status); 337150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 337250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_ZERO_ERROR) { 337350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status); 337450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 337550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status != U_ZERO_ERROR) { 337650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==1 337750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 'v' test flag means that the test pattern should not compile if ICU was configured 337850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // to not include break iteration. RBBI is needed for Unicode word boundaries. 337950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) { 338050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 338150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 338250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 338350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E' 338450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Expected pattern compilation error. 338550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd' 338650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(status)); 338750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 338850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 338950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 339050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Unexpected pattern compilation error. 339150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Line %d: error %s compiling pattern. (UTF8)", line, u_errorName(status)); 339250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 339350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 339450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 339550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 339650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 339750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Pattern == NULL) { 339850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine 339927f654740f2a26ad62a5c155af9199af9e69b889claireho logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line); 340050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 340150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 340250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 340350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag 340450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPatternDump(callerPattern); 340550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 340650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 340750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag 340827f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s, Line %d: Expected, but did not get, a pattern compilation error.", srcPath, line); 340950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 341050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 341150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 341250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 341350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 341450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Number of times find() should be called on the test string, default to 1 341550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 341650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho numFinds = 1; 341750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=2; i<=9; i++) { 341850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)(0x30 + i)) >= 0) { // digit flag 341950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (numFinds != 1) { 342050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Line %d: more than one digit flag. Scanning %d.", line, i); 342150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 342250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 342350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho numFinds = i; 342450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 342550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 342650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 342750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 'M' flag. Use matches() instead of find() 342850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x4d) >= 0) { 342950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho useMatchesFunc = TRUE; 343050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 343150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x4c) >= 0) { 343250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho useLookingAtFunc = TRUE; 343350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 343450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 343550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 343650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Find the tags in the input data, remove them, and record the group boundary 343750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // positions. 343850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 343950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho parsePat = RegexPattern::compile("<(/?)(r|[0-9]+)>", 0, pe, status); 344050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 344150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 344250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho unEscapedInput = inputString.unescape(); 344350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho parseMatcher = parsePat->matcher(unEscapedInput, status); 344450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 344550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(parseMatcher->find()) { 344650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho parseMatcher->appendReplacement(deTaggedInput, "", status); 344750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 344850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString groupNum = parseMatcher->group(2, status); 344950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (groupNum == "r") { 345050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // <r> or </r>, a region specification within the string 345150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (parseMatcher->group(1, status) == "/") { 345250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regionEnd = deTaggedInput.length(); 345350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 345450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regionStart = deTaggedInput.length(); 345550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 345650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 345750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // <digits> or </digits>, a group match boundary tag. 345850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (parseMatcher->group(1, status) == "/") { 345950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho set(groupEnds, deTaggedInput.length(), groupNum); 346050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 346150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho set(groupStarts, deTaggedInput.length(), groupNum); 346250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 346350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 346450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 346550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho parseMatcher->appendTail(deTaggedInput); 346650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_L(groupStarts.size() == groupEnds.size(), line); 346750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((regionStart>=0 || regionEnd>=0) && (regionStart<0 || regionStart>regionEnd)) { 346850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("mismatched <r> tags"); 346950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 347050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 347150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 347250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 347350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 347450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Configure the matcher according to the flags specified with this test. 347550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 347650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher = callerPattern->matcher(deTaggedInput, status); 347750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 347850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag 347950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->setTrace(TRUE); 348050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 348150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 348250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Pattern != NULL) { 348350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status); 348450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; // buffer overflow 348550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputChars = new char[inputUTF8Length+1]; 348650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, status); 348750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status); 348850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 348950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_ZERO_ERROR) { 3490b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText); 349150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 349250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 349350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 349450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher == NULL) { 349550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine 349627f654740f2a26ad62a5c155af9199af9e69b889claireho logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line); 349750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 349850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 349950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 350050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 350127f654740f2a26ad62a5c155af9199af9e69b889claireho // 350227f654740f2a26ad62a5c155af9199af9e69b889claireho // Generate native indices for UTF8 versions of region and capture group info 350327f654740f2a26ad62a5c155af9199af9e69b889claireho // 350427f654740f2a26ad62a5c155af9199af9e69b889claireho if (UTF8Matcher != NULL) { 350527f654740f2a26ad62a5c155af9199af9e69b889claireho if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8); 350627f654740f2a26ad62a5c155af9199af9e69b889claireho if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8); 350727f654740f2a26ad62a5c155af9199af9e69b889claireho 350827f654740f2a26ad62a5c155af9199af9e69b889claireho // Fill out the native index UVector info. 350927f654740f2a26ad62a5c155af9199af9e69b889claireho // Only need 1 loop, from above we know groupStarts.size() = groupEnds.size() 351027f654740f2a26ad62a5c155af9199af9e69b889claireho for (i=0; i<groupStarts.size(); i++) { 351127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t start = groupStarts.elementAti(i); 351227f654740f2a26ad62a5c155af9199af9e69b889claireho // -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting 351327f654740f2a26ad62a5c155af9199af9e69b889claireho if (start >= 0) { 351427f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t startUTF8; 351527f654740f2a26ad62a5c155af9199af9e69b889claireho if (!utextOffsetToNative(&inputText, start, startUTF8)) { 351627f654740f2a26ad62a5c155af9199af9e69b889claireho errln("Error at line %d: could not find native index for group start %d. UTF16 index %d", line, i, start); 351727f654740f2a26ad62a5c155af9199af9e69b889claireho failed = TRUE; 351827f654740f2a26ad62a5c155af9199af9e69b889claireho goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 351927f654740f2a26ad62a5c155af9199af9e69b889claireho } 352027f654740f2a26ad62a5c155af9199af9e69b889claireho setInt(groupStartsUTF8, startUTF8, i); 352127f654740f2a26ad62a5c155af9199af9e69b889claireho } 352227f654740f2a26ad62a5c155af9199af9e69b889claireho 352327f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t end = groupEnds.elementAti(i); 352427f654740f2a26ad62a5c155af9199af9e69b889claireho // -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting 352527f654740f2a26ad62a5c155af9199af9e69b889claireho if (end >= 0) { 352627f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t endUTF8; 352727f654740f2a26ad62a5c155af9199af9e69b889claireho if (!utextOffsetToNative(&inputText, end, endUTF8)) { 352827f654740f2a26ad62a5c155af9199af9e69b889claireho errln("Error at line %d: could not find native index for group end %d. UTF16 index %d", line, i, end); 352927f654740f2a26ad62a5c155af9199af9e69b889claireho failed = TRUE; 353027f654740f2a26ad62a5c155af9199af9e69b889claireho goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 353127f654740f2a26ad62a5c155af9199af9e69b889claireho } 353227f654740f2a26ad62a5c155af9199af9e69b889claireho setInt(groupEndsUTF8, endUTF8, i); 353327f654740f2a26ad62a5c155af9199af9e69b889claireho } 353427f654740f2a26ad62a5c155af9199af9e69b889claireho } 353527f654740f2a26ad62a5c155af9199af9e69b889claireho } 353627f654740f2a26ad62a5c155af9199af9e69b889claireho 353750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (regionStart>=0) { 353850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->region(regionStart, regionEnd, status); 353950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 354050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 354127f654740f2a26ad62a5c155af9199af9e69b889claireho UTF8Matcher->region(regionStartUTF8, regionEndUTF8, status); 354250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 354350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 354450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 354550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x61) >= 0) { // 'a' anchoring bounds flag 354650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->useAnchoringBounds(FALSE); 354750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 354850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->useAnchoringBounds(FALSE); 354950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 355050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 355150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x62) >= 0) { // 'b' transparent bounds flag 355250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->useTransparentBounds(TRUE); 355350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 355450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->useTransparentBounds(TRUE); 355550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 355650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 355750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 355850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 355950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 356050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 356150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Do a find on the de-tagged input using the caller's pattern 356250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: error on count>1 and not find(). 356350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // error on both matches() and lookingAt(). 356450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 356550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<numFinds; i++) { 356650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (useMatchesFunc) { 356750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = matcher->matches(status); 356850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 356950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isUTF8Match = UTF8Matcher->matches(status); 357050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 357150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (useLookingAtFunc) { 357250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = matcher->lookingAt(status); 357350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 357450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isUTF8Match = UTF8Matcher->lookingAt(status); 357550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 357650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 357750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = matcher->find(); 357850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 357950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isUTF8Match = UTF8Matcher->find(); 358050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 358150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 358250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 358350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->setTrace(FALSE); 358450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 358550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 358650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match up the groups from the find() with the groups from the tags 358750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 358850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 358950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // number of tags should match number of groups from find operation. 359050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // matcher->groupCount does not include group 0, the entire match, hence the +1. 359150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // G option in test means that capture group data is not available in the 359250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // expected results, so the check needs to be suppressed. 359350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (isMatch == FALSE && groupStarts.size() != 0) { 3594b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho dataerrln("Error at line %d: Match expected, but none found.", line); 359550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 359650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 359750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && isUTF8Match == FALSE && groupStarts.size() != 0) { 359850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: Match expected, but none found. (UTF8)", line); 359950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 360050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 360150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 360250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 360350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x47 /*G*/) >= 0) { 360450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Only check for match / no match. Don't check capture groups. 360550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (isMatch && groupStarts.size() == 0) { 360650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: No match expected, but one found.", line); 360750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 360850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && isUTF8Match && groupStarts.size() == 0) { 360950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: No match expected, but one found. (UTF8)", line); 361050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 361150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 361250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 361350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 361450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 361550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 361650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<=matcher->groupCount(); i++) { 361750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t expectedStart = (i >= groupStarts.size()? -1 : groupStarts.elementAti(i)); 361827f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t expectedStartUTF8 = (i >= groupStartsUTF8.size()? -1 : groupStartsUTF8.elementAti(i)); 361950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (matcher->start(i, status) != expectedStart) { 362050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: incorrect start position for group %d. Expected %d, got %d", 362150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, i, expectedStart, matcher->start(i, status)); 362250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 362350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 362427f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expectedStartUTF8) { 362550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: incorrect start position for group %d. Expected %d, got %d (UTF8)", 362627f654740f2a26ad62a5c155af9199af9e69b889claireho line, i, expectedStartUTF8, UTF8Matcher->start(i, status)); 362750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 362850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 362950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 363050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 363150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i)); 363227f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i)); 363350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (matcher->end(i, status) != expectedEnd) { 363450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: incorrect end position for group %d. Expected %d, got %d", 363550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, i, expectedEnd, matcher->end(i, status)); 363650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 363750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Error on end position; keep going; real error is probably yet to come as group 363850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // end positions work from end of the input data towards the front. 363927f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expectedEndUTF8) { 364050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: incorrect end position for group %d. Expected %d, got %d (UTF8)", 364127f654740f2a26ad62a5c155af9199af9e69b889claireho line, i, expectedEndUTF8, UTF8Matcher->end(i, status)); 364250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 364350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Error on end position; keep going; real error is probably yet to come as group 364450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // end positions work from end of the input data towards the front. 364550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 364650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 364750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ( matcher->groupCount()+1 < groupStarts.size()) { 364850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: Expected %d capture groups, found %d.", 364950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, groupStarts.size()-1, matcher->groupCount()); 365050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 365150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 365250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (UTF8Matcher != NULL && UTF8Matcher->groupCount()+1 < groupStarts.size()) { 365350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: Expected %d capture groups, found %d. (UTF8)", 365450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, groupStarts.size()-1, UTF8Matcher->groupCount()); 365550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 365650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 365750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 365850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == false 365950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->requireEnd() == TRUE) { 366050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE", line); 366150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 366250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == false 366350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->requireEnd() == TRUE) { 366450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UTF8)", line); 366550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 366650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 366750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 366850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == true 366950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->requireEnd() == FALSE) { 367050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", line); 367150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 367250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) && // 'Y' flag: RequireEnd() == false 367350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->requireEnd() == FALSE) { 367450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UTF8)", line); 367550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 367650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 367750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 367850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false 367950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->hitEnd() == TRUE) { 368050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line); 368150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 368250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false 368350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->hitEnd() == TRUE) { 368450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)", line); 368550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 368650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 368750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 368850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true 368950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->hitEnd() == FALSE) { 369050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line); 369150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 369250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true 369350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->hitEnd() == FALSE) { 369450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE (UTF8)", line); 369550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 369650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 369750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 369850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 369950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehocleanupAndReturn: 370050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (failed) { 370150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho infoln((UnicodeString)"\""+pattern+(UnicodeString)"\" " 370250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho +flags+(UnicodeString)" \""+inputString+(UnicodeString)"\""); 370350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // callerPattern->dump(); 370450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 370550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete parseMatcher; 370650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete parsePat; 370750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete UTF8Matcher; 370850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete UTF8Pattern; 370950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 371050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete callerPattern; 371150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 371250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&inputText); 371350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] inputChars; 371450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&patternText); 371550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] patternChars; 371650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_close(UTF8Converter); 371750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 371850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 371950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 372050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 372150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 372250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 372350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 372450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Errors Check for error handling in patterns. 372550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 372650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 372750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Errors() { 372850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // \escape sequences that aren't implemented yet. 372950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho //REGEX_ERR("hex format \\x{abcd} not implemented", 1, 13, U_REGEX_UNIMPLEMENTED); 373050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 373150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Missing close parentheses 373250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("Comment (?# with no close", 1, 25, U_REGEX_MISMATCHED_PAREN); 373350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("Capturing Parenthesis(...", 1, 25, U_REGEX_MISMATCHED_PAREN); 373450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("Grouping only parens (?: blah blah", 1, 34, U_REGEX_MISMATCHED_PAREN); 373550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 373650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Extra close paren 373750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("Grouping only parens (?: blah)) blah", 1, 31, U_REGEX_MISMATCHED_PAREN); 373850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR(")))))))", 1, 1, U_REGEX_MISMATCHED_PAREN); 373950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("(((((((", 1, 7, U_REGEX_MISMATCHED_PAREN); 374050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 374150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look-ahead, Look-behind 374250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: add tests for unbounded length look-behinds. 374350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc(?<@xyz).*", 1, 7, U_REGEX_RULE_SYNTAX); // illegal construct 374450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 374550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Attempt to use non-default flags 374650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 374750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 374850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 374950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags = UREGEX_CANON_EQ | 375050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_COMMENTS | UREGEX_DOTALL | 375150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_MULTILINE; 375250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat1= RegexPattern::compile(".*", flags, pe, status); 375350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_REGEX_UNIMPLEMENTED); 375450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 375550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 375650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 375750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 375850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Quantifiers are allowed only after something that can be quantified. 375950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("+", 1, 1, U_REGEX_RULE_SYNTAX); 376050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc\ndef(*2)", 2, 5, U_REGEX_RULE_SYNTAX); 376150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc**", 1, 5, U_REGEX_RULE_SYNTAX); 376250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 376350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Mal-formed {min,max} quantifiers 376450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{a,2}",1,5, U_REGEX_BAD_INTERVAL); 376550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{4,2}",1,8, U_REGEX_MAX_LT_MIN); 376650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{1,b}",1,7, U_REGEX_BAD_INTERVAL); 376750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{1,,2}",1,7, U_REGEX_BAD_INTERVAL); 376850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{1,2a}",1,8, U_REGEX_BAD_INTERVAL); 376950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{222222222222222222222}",1,14, U_REGEX_NUMBER_TOO_BIG); 377050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{5,50000000000}", 1, 17, U_REGEX_NUMBER_TOO_BIG); // Overflows int during scan 377150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{5,687865858}", 1, 16, U_REGEX_NUMBER_TOO_BIG); // Overflows regex binary format 377250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{687865858,687865859}", 1, 24, U_REGEX_NUMBER_TOO_BIG); 377350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 377450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Ticket 5389 377550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX); 377650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 377750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Invalid Back Reference \0 377850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // For ICU 3.8 and earlier 377950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // For ICU versions newer than 3.8, \0 introduces an octal escape. 378050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 378150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE); 378250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 378350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 378450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 378550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 378650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------- 378750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 378850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Read a text data file, convert it to UChars, and return the data 378950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// in one big UChar * buffer, which the caller must delete. 379050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 379150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 379250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen, 379350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *defEncoding, UErrorCode &status) { 379450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *retPtr = NULL; 379550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *fileBuf = NULL; 379650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UConverter* conv = NULL; 379750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho FILE *f = NULL; 379850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 379950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ulen = 0; 380050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 380150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retPtr; 380250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 380350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 380450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 380550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Open the file. 380650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 380750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho f = fopen(fileName, "rb"); 380850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (f == 0) { 380950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Error opening test data file %s\n", fileName); 381050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_FILE_ACCESS_ERROR; 381150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 381250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 381350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 381450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Read it in 381550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 381650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t fileSize; 381750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t amt_read; 381850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 381950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fseek( f, 0, SEEK_END); 382050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileSize = ftell(f); 382150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBuf = new char[fileSize]; 382250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fseek(f, 0, SEEK_SET); 382350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho amt_read = fread(fileBuf, 1, fileSize, f); 382450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (amt_read != fileSize || fileSize <= 0) { 382550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error reading test data file."); 382650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanUpAndReturn; 382750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 382850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 382950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 383050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look for a Unicode Signature (BOM) on the data just read 383150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 383250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t signatureLength; 383350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char * fileBufC; 383450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char* encoding; 383550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 383650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBufC = fileBuf; 383750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho encoding = ucnv_detectUnicodeSignature( 383850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBuf, fileSize, &signatureLength, &status); 383950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(encoding!=NULL ){ 384050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBufC += signatureLength; 384150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileSize -= signatureLength; 384250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 384350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho encoding = defEncoding; 384450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (strcmp(encoding, "utf-8") == 0) { 384550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("file %s is missing its BOM", fileName); 384650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 384750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 384850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 384950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 385050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Open a converter to take the rule file to UTF-16 385150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 385250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho conv = ucnv_open(encoding, &status); 385350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 385450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanUpAndReturn; 385550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 385650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 385750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 385850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Convert the rules to UChar. 385950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Preflight first to determine required buffer size. 386050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 386150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ulen = ucnv_toUChars(conv, 386250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho NULL, // dest, 386350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 0, // destCapacity, 386450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBufC, 386550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileSize, 386650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &status); 386750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_BUFFER_OVERFLOW_ERROR) { 386850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Buffer Overflow is expected from the preflight operation. 386950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 387050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 387150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retPtr = new UChar[ulen+1]; 387250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_toUChars(conv, 387350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retPtr, // dest, 387450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ulen+1, 387550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBufC, 387650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileSize, 387750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &status); 387850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 387950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 388050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehocleanUpAndReturn: 388150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fclose(f); 388250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] fileBuf; 388350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_close(conv); 388450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 388550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); 3886b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete []retPtr; 388750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retPtr = 0; 388850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ulen = 0; 388950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 389050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retPtr; 389150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 389250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 389350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 389450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------- 389550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 389650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// PerlTests - Run Perl's regular expression tests 389750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The input file for this test is re_tests, the standard regular 389850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// expression test data distributed with the Perl source code. 389950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 390050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Here is Perl's description of the test data file: 390150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 390250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # The tests are in a separate file 't/op/re_tests'. 390350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Each line in that file is a separate test. 390450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # There are five columns, separated by tabs. 390550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 390650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 1 contains the pattern, optionally enclosed in C<''>. 390750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Modifiers can be put after the closing C<'>. 390850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 390950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 2 contains the string to be matched. 391050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 391150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 3 contains the expected result: 391250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # y expect a match 391350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # n expect no match 391450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # c expect an error 391550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # B test exposes a known bug in Perl, should be skipped 391650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # b test exposes a known bug in Perl, should be skipped if noamp 391750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 391850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Columns 4 and 5 are used only if column 3 contains C<y> or C<c>. 391950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 392050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 4 contains a string, usually C<$&>. 392150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 392250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 5 contains the expected result of double-quote 392350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # interpolating that string after the match, or start of error message. 392450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 392550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 6, if present, contains a reason why the test is skipped. 392650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # This is printed with "skipped", for harness to pick up. 392750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 392850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # \n in the tests are interpolated, as are variables of the form ${\w+}. 392950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 393050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # If you want to add a regular expression test that can't be expressed 393150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # in this format, don't add it here: put it in op/pat.t instead. 393250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 393350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// For ICU, if field 3 contains an 'i', the test will be skipped. 393450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The test exposes is some known incompatibility between ICU and Perl regexps. 393550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// (The i is in addition to whatever was there before.) 393650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 393750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------- 393850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PerlTests() { 393950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char tdd[2048]; 394050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *srcPath; 394150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 394250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 394350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 394450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 394550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Open and read the test data file. 394650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 394750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcPath=getPath(tdd, "re_tests.txt"); 394850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(srcPath==NULL) { 394950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; /* something went wrong, error already output */ 395050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 395150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 395250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len; 395350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status); 395450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 395550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; /* something went wrong, error already output */ 395650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 395750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 395850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 395950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Put the test data into a UnicodeString 396050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 396150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testDataString(FALSE, testData, len); 396250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 396350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 396450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Regex to break the input file into lines, and strip the new lines. 396550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // One line per match, capture group one is the desired data. 396650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 396750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status); 396850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 396950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("RegexPattern::compile() error"); 397050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 397150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 397250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher* lineMat = linePat->matcher(testDataString, status); 397350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 397450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 397550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Regex to split a test file line into fields. 397650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There are six fields, separated by tabs. 397750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 397850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status); 397950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 398050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 398150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Regex to identify test patterns with flag settings, and to separate them. 398250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test patterns with flags look like 'pattern'i 398350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test patterns without flags are not quoted: pattern 398450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Coming out, capture group 2 is the pattern, capture group 3 is the flags. 398550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 398650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status); 398750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher* flagMat = flagPat->matcher(status); 398850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 398950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 399050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The Perl tests reference several perl-isms, which are evaluated/substituted 399150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // in the test data. Not being perl, this must be done explicitly. Here 399250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // are string constants and REs for these constructs. 399350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 399450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString nulnulSrc("${nulnul}"); 399550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString nulnul("\\u0000\\u0000", -1, US_INV); 399650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho nulnul = nulnul.unescape(); 399750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 399850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString ffffSrc("${ffff}"); 399950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString ffff("\\uffff", -1, US_INV); 400050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ffff = ffff.unescape(); 400150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 400250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // regexp for $-[0], $+[2], etc. 400350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status); 400450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *groupsMat = groupsPat->matcher(status); 400550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 400650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // regexp for $0, $1, $2, etc. 400750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status); 400850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *cgMat = cgPat->matcher(status); 400950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 401050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 401150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 401250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Main Loop for the Perl Tests, runs once per line from the 401350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // test data file. 401450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 401550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t lineNum = 0; 401650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t skippedUnimplementedCount = 0; 401750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (lineMat->find()) { 401850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lineNum++; 401950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 402050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 402150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Get a line, break it into its fields, do the Perl 402250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // variable substitutions. 402350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 402450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString line = lineMat->group(1, status); 402550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString fields[7]; 402650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fieldPat->split(line, fields, 7, status); 402750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 402850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flagMat->reset(fields[0]); 402950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flagMat->matches(status); 403050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString pattern = flagMat->group(2, status); 403150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pattern.findAndReplace("${bang}", "!"); 403250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000")); 403350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pattern.findAndReplace(ffffSrc, ffff); 403450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 403550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 403650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Identify patterns that include match flag settings, 403750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // split off the flags, remove the extra quotes. 403850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 403950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString flagStr = flagMat->group(3, status); 404050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 404150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); 404250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 404350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 404450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags = 0; 404550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar UChar_c = 0x63; // Char constants for the flag letters. 404650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar UChar_i = 0x69; // (Damn the lack of Unicode support in C) 404750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar UChar_m = 0x6d; 404850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar UChar_x = 0x78; 404950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar UChar_y = 0x79; 405050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flagStr.indexOf(UChar_i) != -1) { 405150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flags |= UREGEX_CASE_INSENSITIVE; 405250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 405350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flagStr.indexOf(UChar_m) != -1) { 405450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flags |= UREGEX_MULTILINE; 405550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 405650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flagStr.indexOf(UChar_x) != -1) { 405750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flags |= UREGEX_COMMENTS; 405850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 405950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 406050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 406150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compile the test pattern. 406250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 406350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 406450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *testPat = RegexPattern::compile(pattern, flags, pe, status); 406550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_REGEX_UNIMPLEMENTED) { 406650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 406750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test of a feature that is planned for ICU, but not yet implemented. 406850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // skip the test. 406950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho skippedUnimplementedCount++; 407050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 407150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 407250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 407350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 407450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 407550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 407650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Some tests are supposed to generate errors. 407750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Only report an error for tests that are supposed to succeed. 407850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fields[2].indexOf(UChar_c) == -1 && // Compilation is not supposed to fail AND 407950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[2].indexOf(UChar_i) == -1) // it's not an accepted ICU incompatibility 408050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 408150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status)); 408250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 408350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 408450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 408550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 408650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 408750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 408850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fields[2].indexOf(UChar_i) >= 0) { 408950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // ICU should skip this test. 409050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 409150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 409250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 409350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 409450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fields[2].indexOf(UChar_c) >= 0) { 409550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This pattern should have caused a compilation error, but didn't/ 409650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("line %d: Expected a pattern compile error, got success.", lineNum); 409750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 409850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 409950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 410050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 410150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 410250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // replace the Perl variables that appear in some of the 410350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match data strings. 410450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 410550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString matchString = fields[1]; 410650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchString.findAndReplace(nulnulSrc, nulnul); 410750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchString.findAndReplace(ffffSrc, ffff); 410850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 410950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Replace any \n in the match string with an actual new-line char. 411050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Don't do full unescape, as this unescapes more than Perl does, which 411150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // causes other spurious failures in the tests. 411250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 411350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 411450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 411550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 411650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 411750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Run the test, check for expected match/don't match result. 411850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 411950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *testMat = testPat->matcher(matchString, status); 412050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool found = testMat->find(); 412150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool expected = FALSE; 412250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fields[2].indexOf(UChar_y) >=0) { 412350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expected = TRUE; 412450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 412550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (expected != found) { 412650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("line %d: Expected %smatch, got %smatch", 412750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lineNum, expected?"":"no ", found?"":"no " ); 412850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 412950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 413050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 413150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Don't try to check expected results if there is no match. 413250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (Some have stuff in the expected fields) 413350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!found) { 413450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testMat; 413550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 413650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 413750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 413850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 413950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 414050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Interpret the Perl expression from the fourth field of the data file, 414150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // building up an ICU string from the results of the ICU match. 414250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The Perl expression will contain references to the results of 414350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // a regex match, including the matched string, capture group strings, 414450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // group starting and ending indicies, etc. 414550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 414650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString resultString; 414750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString perlExpr = fields[3]; 414850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if SUPPORT_MUTATING_INPUT_STRING 414950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho groupsMat->reset(perlExpr); 415050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cgMat->reset(perlExpr); 415150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 415250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 415350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (perlExpr.length() > 0) { 415450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !SUPPORT_MUTATING_INPUT_STRING 415550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Perferred usage. Reset after any modification to input string. 415650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho groupsMat->reset(perlExpr); 415750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cgMat->reset(perlExpr); 415850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 415950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 416050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (perlExpr.startsWith("$&")) { 416150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(testMat->group(status)); 416250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 2); 416350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 416450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 416550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (groupsMat->lookingAt(status)) { 416650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // $-[0] $+[2] etc. 416750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString digitString = groupsMat->group(2, status); 416850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t t = 0; 416950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 417050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString plusOrMinus = groupsMat->group(1, status); 417150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t matchPosition; 417250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (plusOrMinus.compare("+") == 0) { 417350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchPosition = testMat->end(groupNum, status); 417450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 417550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchPosition = testMat->start(groupNum, status); 417650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 417750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (matchPosition != -1) { 417850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ICU_Utility::appendNumber(resultString, matchPosition); 417950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 418050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, groupsMat->end(status)); 418150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 418250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 418350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (cgMat->lookingAt(status)) { 418450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // $1, $2, $3, etc. 418550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString digitString = cgMat->group(1, status); 418650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t t = 0; 418750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 418850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 418950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(testMat->group(groupNum, status)); 419050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 419150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 419250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, cgMat->end(status)); 419350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 419450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 419550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (perlExpr.startsWith("@-")) { 419650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 419750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<=testMat->groupCount(); i++) { 419850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (i>0) { 419950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(" "); 420050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 420150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ICU_Utility::appendNumber(resultString, testMat->start(i, status)); 420250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 420350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 2); 420450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 420550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 420650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (perlExpr.startsWith("@+")) { 420750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 420850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<=testMat->groupCount(); i++) { 420950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (i>0) { 421050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(" "); 421150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 421250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ICU_Utility::appendNumber(resultString, testMat->end(i, status)); 421350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 421450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 2); 421550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 421650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 421750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) { // \Escape. Take following char as a literal. 421850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // or as an escaped sequence (e.g. \n) 421950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (perlExpr.length() > 1) { 422050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 1); // Remove the '\', but only if not last char. 422150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 422250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c = perlExpr.charAt(0); 422350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho switch (c) { 422450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case 'n': c = '\n'; break; 422550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // add any other escape sequences that show up in the test expected results. 422650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 422750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(c); 422850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 1); 422950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 423050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 423150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else { 423250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Any characters from the perl expression that we don't explicitly 423350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // recognize before here are assumed to be literals and copied 423450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // as-is to the expected results. 423550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(perlExpr.charAt(0)); 423650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 1); 423750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 423850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 423950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 424050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status)); 424150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 424250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 424350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 424450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 424550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 424650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Expected Results Compare 424750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 424850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString expectedS(fields[4]); 424950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expectedS.findAndReplace(nulnulSrc, nulnul); 425050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expectedS.findAndReplace(ffffSrc, ffff); 425150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 4252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 425450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (expectedS.compare(resultString) != 0) { 425550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho err("Line %d: Incorrect perl expression results.", lineNum); 425650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\""); 425750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 425950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testMat; 426050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 4261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 426450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // All done. Clean up allocated stuff. 4265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 426650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete cgMat; 426750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete cgPat; 4268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 426950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete groupsMat; 427050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete groupsPat; 4271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 427250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete flagMat; 427350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete flagPat; 4274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 427550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete lineMat; 427650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete linePat; 4277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 427850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fieldPat; 427950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete [] testData; 428050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 428150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 428250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount); 4283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------- 4288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 428950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// PerlTestsUTF8 Run Perl's regular expression tests on UTF-8-based UTexts 429050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// (instead of using UnicodeStrings) to test the alternate engine. 429150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The input file for this test is re_tests, the standard regular 429250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// expression test data distributed with the Perl source code. 429350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// See PerlTests() for more information. 4294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 4295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------- 429650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PerlTestsUTF8() { 4297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char tdd[2048]; 4298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *srcPath; 4299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 430150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalUConverterPointer UTF8Converter(ucnv_open("UTF-8", &status)); 430250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText patternText = UTEXT_INITIALIZER; 430350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *patternChars = NULL; 430450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t patternLength; 430550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t patternCapacity = 0; 430650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText inputText = UTEXT_INITIALIZER; 430750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *inputChars = NULL; 430850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t inputLength; 430950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t inputCapacity = 0; 431050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 431150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); 4312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Open and read the test data file. 4315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcPath=getPath(tdd, "re_tests.txt"); 4317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcPath==NULL) { 4318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; /* something went wrong, error already output */ 4319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len; 4322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status); 4323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; /* something went wrong, error already output */ 4325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Put the test data into a UnicodeString 4329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString testDataString(FALSE, testData, len); 4331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Regex to break the input file into lines, and strip the new lines. 4334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // One line per match, capture group one is the desired data. 4335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status); 4337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dataerrln("RegexPattern::compile() error"); 4339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 4340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher* lineMat = linePat->matcher(testDataString, status); 4342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Regex to split a test file line into fields. 4345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // There are six fields, separated by tabs. 4346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status); 4348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Regex to identify test patterns with flag settings, and to separate them. 4351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test patterns with flags look like 'pattern'i 4352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test patterns without flags are not quoted: pattern 4353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Coming out, capture group 2 is the pattern, capture group 3 is the flags. 4354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status); 4356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher* flagMat = flagPat->matcher(status); 4357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The Perl tests reference several perl-isms, which are evaluated/substituted 4360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in the test data. Not being perl, this must be done explicitly. Here 4361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // are string constants and REs for these constructs. 4362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString nulnulSrc("${nulnul}"); 4364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString nulnul("\\u0000\\u0000", -1, US_INV); 4365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nulnul = nulnul.unescape(); 4366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString ffffSrc("${ffff}"); 4368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString ffff("\\uffff", -1, US_INV); 4369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ffff = ffff.unescape(); 4370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // regexp for $-[0], $+[2], etc. 4372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status); 4373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *groupsMat = groupsPat->matcher(status); 4374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // regexp for $0, $1, $2, etc. 4376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status); 4377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *cgMat = cgPat->matcher(status); 4378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Main Loop for the Perl Tests, runs once per line from the 4382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test data file. 4383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lineNum = 0; 4385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t skippedUnimplementedCount = 0; 4386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (lineMat->find()) { 4387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lineNum++; 4388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Get a line, break it into its fields, do the Perl 4391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // variable substitutions. 4392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString line = lineMat->group(1, status); 4394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString fields[7]; 4395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fieldPat->split(line, fields, 7, status); 4396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flagMat->reset(fields[0]); 4398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flagMat->matches(status); 4399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pattern = flagMat->group(2, status); 4400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern.findAndReplace("${bang}", "!"); 4401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000")); 4402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern.findAndReplace(ffffSrc, ffff); 4403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Identify patterns that include match flag settings, 4406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // split off the flags, remove the extra quotes. 4407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString flagStr = flagMat->group(3, status); 4409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); 4411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 4412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags = 0; 4414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar UChar_c = 0x63; // Char constants for the flag letters. 4415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar UChar_i = 0x69; // (Damn the lack of Unicode support in C) 4416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar UChar_m = 0x6d; 4417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar UChar_x = 0x78; 4418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar UChar_y = 0x79; 4419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (flagStr.indexOf(UChar_i) != -1) { 4420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flags |= UREGEX_CASE_INSENSITIVE; 4421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (flagStr.indexOf(UChar_m) != -1) { 4423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flags |= UREGEX_MULTILINE; 4424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (flagStr.indexOf(UChar_x) != -1) { 4426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flags |= UREGEX_COMMENTS; 4427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 442850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 442950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 443050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Put the pattern in a UTF-8 UText 443150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 443250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 443350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternLength = pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status); 443450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_BUFFER_OVERFLOW_ERROR) { 443550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 443650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] patternChars; 443750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternCapacity = patternLength + 1; 443850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars = new char[patternCapacity]; 443950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status); 444050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 444150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&patternText, patternChars, patternLength, &status); 4442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compile the test pattern. 4445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 444650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *testPat = RegexPattern::compile(&patternText, flags, pe, status); 4447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (status == U_REGEX_UNIMPLEMENTED) { 4448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test of a feature that is planned for ICU, but not yet implemented. 4450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // skip the test. 4451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru skippedUnimplementedCount++; 4452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testPat; 4453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 4454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 4455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Some tests are supposed to generate errors. 4459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Only report an error for tests that are supposed to succeed. 4460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fields[2].indexOf(UChar_c) == -1 && // Compilation is not supposed to fail AND 4461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[2].indexOf(UChar_i) == -1) // it's not an accepted ICU incompatibility 4462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status)); 4464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 4466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testPat; 4467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 4468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fields[2].indexOf(UChar_i) >= 0) { 4471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ICU should skip this test. 4472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testPat; 4473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 4474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fields[2].indexOf(UChar_c) >= 0) { 4477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This pattern should have caused a compilation error, but didn't/ 4478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("line %d: Expected a pattern compile error, got success.", lineNum); 4479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testPat; 4480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 4481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 448350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // replace the Perl variables that appear in some of the 4486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match data strings. 4487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString matchString = fields[1]; 4489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchString.findAndReplace(nulnulSrc, nulnul); 4490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchString.findAndReplace(ffffSrc, ffff); 4491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Replace any \n in the match string with an actual new-line char. 4493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Don't do full unescape, as this unescapes more than Perl does, which 4494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // causes other spurious failures in the tests. 4495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 4496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 449750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 449850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Put the input in a UTF-8 UText 449950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 450050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 450150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputLength = matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status); 450250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_BUFFER_OVERFLOW_ERROR) { 450350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 450450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] inputChars; 450550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputCapacity = inputLength + 1; 450650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputChars = new char[inputCapacity]; 450750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status); 450850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 450950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&inputText, inputChars, inputLength, &status); 4510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Run the test, check for expected match/don't match result. 4513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4514b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *testMat = &testPat->matcher(status)->reset(&inputText); 4515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool found = testMat->find(); 4516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool expected = FALSE; 4517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fields[2].indexOf(UChar_y) >=0) { 4518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected = TRUE; 4519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expected != found) { 4521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("line %d: Expected %smatch, got %smatch", 4522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lineNum, expected?"":"no ", found?"":"no " ); 4523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 4524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Don't try to check expected results if there is no match. 4527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // (Some have stuff in the expected fields) 4528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!found) { 4529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete testMat; 4530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete testPat; 4531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 4532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Interpret the Perl expression from the fourth field of the data file, 4536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // building up an ICU string from the results of the ICU match. 4537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The Perl expression will contain references to the results of 4538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a regex match, including the matched string, capture group strings, 4539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // group starting and ending indicies, etc. 4540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString resultString; 4542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString perlExpr = fields[3]; 4543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (perlExpr.length() > 0) { 454550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho groupsMat->reset(perlExpr); 454650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cgMat->reset(perlExpr); 454750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (perlExpr.startsWith("$&")) { 4549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(testMat->group(status)); 4550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 2); 4551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (groupsMat->lookingAt(status)) { 4554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // $-[0] $+[2] etc. 4555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString digitString = groupsMat->group(2, status); 4556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t t = 0; 4557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 4558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString plusOrMinus = groupsMat->group(1, status); 4559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchPosition; 4560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (plusOrMinus.compare("+") == 0) { 4561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchPosition = testMat->end(groupNum, status); 4562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchPosition = testMat->start(groupNum, status); 4564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (matchPosition != -1) { 4566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::appendNumber(resultString, matchPosition); 4567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, groupsMat->end(status)); 4569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (cgMat->lookingAt(status)) { 4572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // $1, $2, $3, etc. 4573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString digitString = cgMat->group(1, status); 4574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t t = 0; 4575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 4576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 4577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(testMat->group(groupNum, status)); 4578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 4579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, cgMat->end(status)); 4581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (perlExpr.startsWith("@-")) { 4584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 4585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<=testMat->groupCount(); i++) { 4586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i>0) { 4587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(" "); 4588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::appendNumber(resultString, testMat->start(i, status)); 4590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 2); 4592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (perlExpr.startsWith("@+")) { 4595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 4596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<=testMat->groupCount(); i++) { 4597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i>0) { 4598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(" "); 4599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::appendNumber(resultString, testMat->end(i, status)); 4601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 2); 4603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) { // \Escape. Take following char as a literal. 4606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // or as an escaped sequence (e.g. \n) 4607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (perlExpr.length() > 1) { 4608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 1); // Remove the '\', but only if not last char. 4609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c = perlExpr.charAt(0); 4611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (c) { 4612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'n': c = '\n'; break; 4613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // add any other escape sequences that show up in the test expected results. 4614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(c); 4616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 1); 4617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 4620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Any characters from the perl expression that we don't explicitly 4621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // recognize before here are assumed to be literals and copied 4622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // as-is to the expected results. 4623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(perlExpr.charAt(0)); 4624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 1); 4625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status)); 4629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Expected Results Compare 4635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString expectedS(fields[4]); 4637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedS.findAndReplace(nulnulSrc, nulnul); 4638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedS.findAndReplace(ffffSrc, ffff); 4639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 4640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedS.compare(resultString) != 0) { 4643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err("Line %d: Incorrect perl expression results.", lineNum); 464450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\""); 4645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testMat; 4648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testPat; 4649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // All done. Clean up allocated stuff. 4653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete cgMat; 4655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete cgPat; 4656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete groupsMat; 4658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete groupsPat; 4659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete flagMat; 4661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete flagPat; 4662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete lineMat; 4664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete linePat; 4665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fieldPat; 4667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete [] testData; 466850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 466950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&patternText); 467050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&inputText); 467150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 467250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete [] patternChars; 467350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete [] inputChars; 4674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount); 4677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4681b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//-------------------------------------------------------------- 4682b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 4683b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Bug6149 Verify limits to heap expansion for backtrack stack. 4684b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Use this pattern, 4685b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// "(a?){1,}" 4686b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// The zero-length match will repeat forever. 4687b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// (That this goes into a loop is another bug) 4688b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 4689b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//--------------------------------------------------------------- 4690b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid RegexTest::Bug6149() { 4691b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString pattern("(a?){1,}"); 4692b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString s("xyz"); 4693b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t flags = 0; 4694b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4695b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 4696b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru RegexMatcher matcher(pattern, s, flags, status); 4697b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UBool result = false; 4698b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru REGEX_ASSERT_FAIL(result=matcher.matches(status), U_REGEX_STACK_OVERFLOW); 4699b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru REGEX_ASSERT(result == FALSE); 4700b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 4701b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 4702b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 4703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 4704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Callbacks() Test the callback function. 4705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// When set, callbacks occur periodically during matching operations, 4706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// giving the application code the ability to abort the operation 4707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// before it's normal completion. 4708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 4709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustruct callBackContext { 4711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexTest *test; 4712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t maxCalls; 4713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t numCalls; 4714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t lastSteps; 4715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru void reset(int32_t max) {maxCalls=max; numCalls=0; lastSteps=0;}; 4716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 4717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CDECL_BEGIN 4719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic UBool U_CALLCONV 4720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QuerutestCallBackFn(const void *context, int32_t steps) { 4721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru callBackContext *info = (callBackContext *)context; 4722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (info->lastSteps+1 != steps) { 4723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru info->test->errln("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps); 4724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru info->lastSteps = steps; 4726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru info->numCalls++; 4727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (info->numCalls < info->maxCalls); 4728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 4729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CDECL_END 4730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid RegexTest::Callbacks() { 4732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 4733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Getter returns NULLs if no callback has been set 4734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The variables that the getter will fill in. 4736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Init to non-null values so that the action of the getter can be seen. 4737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const void *returnedContext = &returnedContext; 4738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru URegexMatchCallback *returnedFn = &testCallBackFn; 4739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher("x", 0, status); 4742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.getMatchCallback(returnedFn, returnedContext, status); 4744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(returnedFn == NULL); 4746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(returnedContext == NULL); 4747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 4750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Set and Get work 4751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru callBackContext cbInfo = {this, 0, 0, 0}; 4752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const void *returnedContext; 4753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru URegexMatchCallback *returnedFn; 4754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. 4756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setMatchCallback(testCallBackFn, &cbInfo, status); 4758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.getMatchCallback(returnedFn, returnedContext, status); 4760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(returnedFn == testCallBackFn); 4762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(returnedContext == &cbInfo); 4763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // A short-running match shouldn't invoke the callback 4765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 4766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cbInfo.reset(1); 4767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString s = "xxx"; 4768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.reset(s); 4769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.matches(status)); 4770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(cbInfo.numCalls == 0); 4772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // A medium-length match that runs long enough to invoke the 4774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // callback, but not so long that the callback aborts it. 4775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 4776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cbInfo.reset(4); 4777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s = "aaaaaaaaaaaaaaaaaaab"; 4778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.reset(s); 4779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.matches(status)==FALSE); 4780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(cbInfo.numCalls > 0); 4782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // A longer running match that the callback function will abort. 4784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 4785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cbInfo.reset(4); 4786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s = "aaaaaaaaaaaaaaaaaaaaaaab"; 4787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.reset(s); 4788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.matches(status)==FALSE); 4789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); 4790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(cbInfo.numCalls == 4); 4791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 4795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 479650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 479727f654740f2a26ad62a5c155af9199af9e69b889claireho// 479827f654740f2a26ad62a5c155af9199af9e69b889claireho// FindProgressCallbacks() Test the find "progress" callback function. 479927f654740f2a26ad62a5c155af9199af9e69b889claireho// When set, the find progress callback will be invoked during a find operations 480027f654740f2a26ad62a5c155af9199af9e69b889claireho// after each return from a match attempt, giving the application the opportunity 480127f654740f2a26ad62a5c155af9199af9e69b889claireho// to terminate a long-running find operation before it's normal completion. 480227f654740f2a26ad62a5c155af9199af9e69b889claireho// 480327f654740f2a26ad62a5c155af9199af9e69b889claireho 480427f654740f2a26ad62a5c155af9199af9e69b889clairehostruct progressCallBackContext { 480527f654740f2a26ad62a5c155af9199af9e69b889claireho RegexTest *test; 480627f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t lastIndex; 480727f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t maxCalls; 480827f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t numCalls; 480927f654740f2a26ad62a5c155af9199af9e69b889claireho void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}; 481027f654740f2a26ad62a5c155af9199af9e69b889claireho}; 481127f654740f2a26ad62a5c155af9199af9e69b889claireho 481227f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_BEGIN 481327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool U_CALLCONV 481427f654740f2a26ad62a5c155af9199af9e69b889clairehotestProgressCallBackFn(const void *context, int64_t matchIndex) { 481527f654740f2a26ad62a5c155af9199af9e69b889claireho progressCallBackContext *info = (progressCallBackContext *)context; 481627f654740f2a26ad62a5c155af9199af9e69b889claireho info->numCalls++; 481727f654740f2a26ad62a5c155af9199af9e69b889claireho info->lastIndex = matchIndex; 481827f654740f2a26ad62a5c155af9199af9e69b889claireho// info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n", matchIndex, info->numCalls); 481927f654740f2a26ad62a5c155af9199af9e69b889claireho return (info->numCalls < info->maxCalls); 482027f654740f2a26ad62a5c155af9199af9e69b889claireho} 482127f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_END 482227f654740f2a26ad62a5c155af9199af9e69b889claireho 482327f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::FindProgressCallbacks() { 482427f654740f2a26ad62a5c155af9199af9e69b889claireho { 482527f654740f2a26ad62a5c155af9199af9e69b889claireho // Getter returns NULLs if no callback has been set 482627f654740f2a26ad62a5c155af9199af9e69b889claireho 482727f654740f2a26ad62a5c155af9199af9e69b889claireho // The variables that the getter will fill in. 482827f654740f2a26ad62a5c155af9199af9e69b889claireho // Init to non-null values so that the action of the getter can be seen. 482927f654740f2a26ad62a5c155af9199af9e69b889claireho const void *returnedContext = &returnedContext; 483027f654740f2a26ad62a5c155af9199af9e69b889claireho URegexFindProgressCallback *returnedFn = &testProgressCallBackFn; 483127f654740f2a26ad62a5c155af9199af9e69b889claireho 483227f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status = U_ZERO_ERROR; 483327f654740f2a26ad62a5c155af9199af9e69b889claireho RegexMatcher matcher("x", 0, status); 483427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 483527f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.getFindProgressCallback(returnedFn, returnedContext, status); 483627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 483727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(returnedFn == NULL); 483827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(returnedContext == NULL); 483927f654740f2a26ad62a5c155af9199af9e69b889claireho } 484027f654740f2a26ad62a5c155af9199af9e69b889claireho 484127f654740f2a26ad62a5c155af9199af9e69b889claireho { 484227f654740f2a26ad62a5c155af9199af9e69b889claireho // Set and Get work 484327f654740f2a26ad62a5c155af9199af9e69b889claireho progressCallBackContext cbInfo = {this, 0, 0, 0}; 484427f654740f2a26ad62a5c155af9199af9e69b889claireho const void *returnedContext; 484527f654740f2a26ad62a5c155af9199af9e69b889claireho URegexFindProgressCallback *returnedFn; 484627f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status = U_ZERO_ERROR; 484727f654740f2a26ad62a5c155af9199af9e69b889claireho RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. 484827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 484927f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status); 485027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 485127f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.getFindProgressCallback(returnedFn, returnedContext, status); 485227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 485327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(returnedFn == testProgressCallBackFn); 485427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(returnedContext == &cbInfo); 485527f654740f2a26ad62a5c155af9199af9e69b889claireho 485627f654740f2a26ad62a5c155af9199af9e69b889claireho // A short-running match should NOT invoke the callback. 485727f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 485827f654740f2a26ad62a5c155af9199af9e69b889claireho cbInfo.reset(100); 485927f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString s = "abxxx"; 486027f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.reset(s); 486127f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0 486227f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.setTrace(TRUE); 486327f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 486427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(matcher.find(0, status)); 486527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 486627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(cbInfo.numCalls == 0); 486727f654740f2a26ad62a5c155af9199af9e69b889claireho 486827f654740f2a26ad62a5c155af9199af9e69b889claireho // A medium running match that causes matcher.find() to invoke our callback for each index. 486927f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 487027f654740f2a26ad62a5c155af9199af9e69b889claireho s = "aaaaaaaaaaaaaaaaaaab"; 487127f654740f2a26ad62a5c155af9199af9e69b889claireho cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string 487227f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.reset(s); 487327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(matcher.find(0, status)==FALSE); 487427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 487527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25); 487627f654740f2a26ad62a5c155af9199af9e69b889claireho 487727f654740f2a26ad62a5c155af9199af9e69b889claireho // A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point. 487827f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 487927f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab"; 488027f654740f2a26ad62a5c155af9199af9e69b889claireho cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string 488127f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.reset(s1); 488227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(matcher.find(0, status)==FALSE); 488327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 488427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5); 488527f654740f2a26ad62a5c155af9199af9e69b889claireho 488627f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0 488727f654740f2a26ad62a5c155af9199af9e69b889claireho // Now a match that will succeed, but after an interruption 488827f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 488927f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx"; 489027f654740f2a26ad62a5c155af9199af9e69b889claireho cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string 489127f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.reset(s2); 489227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(matcher.find(0, status)==FALSE); 489327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 489427f654740f2a26ad62a5c155af9199af9e69b889claireho // Now retry the match from where left off 489527f654740f2a26ad62a5c155af9199af9e69b889claireho cbInfo.maxCalls = 100; // No callback limit 489627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status)); 489727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 489827f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 489927f654740f2a26ad62a5c155af9199af9e69b889claireho } 490027f654740f2a26ad62a5c155af9199af9e69b889claireho 490127f654740f2a26ad62a5c155af9199af9e69b889claireho 490227f654740f2a26ad62a5c155af9199af9e69b889claireho} 490327f654740f2a26ad62a5c155af9199af9e69b889claireho 490427f654740f2a26ad62a5c155af9199af9e69b889claireho 490550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 490650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 490750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// PreAllocatedUTextCAPI Check the C API with pre-allocated mutable 490850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// UTexts. The pure-C implementation of UText 490950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// has no mutable backing stores, but we can 491050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// use UnicodeString here to test the functionality. 491150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 491250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 491350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PreAllocatedUTextCAPI () { 491450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 491550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho URegularExpression *re; 491650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText patternText = UTEXT_INITIALIZER; 491750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString buffer; 491850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText bufferText = UTEXT_INITIALIZER; 491950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 492050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&bufferText, &buffer, &status); 492150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 492250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 492350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * getText() and getUText() 492450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 492550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 492650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText text1 = UTEXT_INITIALIZER; 492750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText text2 = UTEXT_INITIALIZER; 492850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text2Chars[20]; 492950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *resultText; 493050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 493150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 493227f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status); 493327f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status); 493450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2); 493550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&text2, text2Chars, -1, &status); 493650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 493727f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status); 493850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re = uregex_openUText(&patternText, 0, NULL, &status); 493950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 494050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* First set a UText */ 494150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setUText(re, &text1, &status); 494250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultText = uregex_getUText(re, &bufferText, &status); 494350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 494450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(resultText == &bufferText); 494550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(resultText, 0); 494650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(&text1, 0); 4947103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius REGEX_ASSERT(testUTextEqual(resultText, &text1)); 494850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 494950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultText = uregex_getUText(re, &bufferText, &status); 495050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 495150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(resultText == &bufferText); 495250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(resultText, 0); 495350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(&text1, 0); 4954103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius REGEX_ASSERT(testUTextEqual(resultText, &text1)); 495550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 495650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Then set a UChar * */ 495750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text2Chars, 7, &status); 495850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultText = uregex_getUText(re, &bufferText, &status); 495950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 496050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(resultText == &bufferText); 496150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(resultText, 0); 496250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(&text2, 0); 4963103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius REGEX_ASSERT(testUTextEqual(resultText, &text2)); 496450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 496550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_close(re); 496650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&text1); 496750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&text2); 496850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 496950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 497050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 497150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * group() 497250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 497350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 497450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text1[80]; 497550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *actual; 497650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool result; 497750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 497850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 497950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 498050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re = uregex_openC("abc(.*?)def", 0, NULL, &status); 498150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 498250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 498350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text1, -1, &status); 498450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_find(re, 0, &status); 498550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result==TRUE); 498650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 498750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Capture Group 0, the full match. Should succeed. */ 498850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 498927f654740f2a26ad62a5c155af9199af9e69b889claireho actual = uregex_groupUTextDeep(re, 0, &bufferText, &status); 499050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 499150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(actual == &bufferText); 499227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual); 499350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 499450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Capture group #1. Should succeed. */ 499550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 499627f654740f2a26ad62a5c155af9199af9e69b889claireho actual = uregex_groupUTextDeep(re, 1, &bufferText, &status); 499750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 499850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(actual == &bufferText); 499927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual); 500050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 500150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Capture group out of range. Error. */ 500250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 500327f654740f2a26ad62a5c155af9199af9e69b889claireho actual = uregex_groupUTextDeep(re, 2, &bufferText, &status); 500450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 500550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(actual == &bufferText); 500650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 500750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_close(re); 500850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 500950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 501050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 501150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 501250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * replaceFirst() 501350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 501450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 501550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text1[80]; 501650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text2[80]; 501750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replText = UTEXT_INITIALIZER; 501850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *result; 501950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 502050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 502150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 502250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 502327f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); 502450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 502550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re = uregex_openC("x(.*?)x", 0, NULL, &status); 502650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 502750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 502850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Normal case, with match */ 502950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text1, -1, &status); 503050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 503150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 503250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 503350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &bufferText); 503427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result); 503550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 503650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* No match. Text should copy to output with no changes. */ 503750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text2, -1, &status); 503850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 503950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 504050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 504150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &bufferText); 504227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); 504350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 504450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Unicode escapes */ 504550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text1, -1, &status); 504627f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status); 504750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 504850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 504950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 505050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &bufferText); 505127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result); 505250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 505350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_close(re); 505450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replText); 505550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 505650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 505750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 505850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 505950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * replaceAll() 506050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 506150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 506250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text1[80]; 506350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text2[80]; 506450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replText = UTEXT_INITIALIZER; 506550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *result; 506650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 506750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 506850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 506950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 507027f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); 507150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 507250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re = uregex_openC("x(.*?)x", 0, NULL, &status); 507350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 507450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 507550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Normal case, with match */ 507650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text1, -1, &status); 507750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 507850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_replaceAllUText(re, &replText, &bufferText, &status); 507950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 508050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &bufferText); 508127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> <1> <...>.", result); 508250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 508350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* No match. Text should copy to output with no changes. */ 508450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text2, -1, &status); 508550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 508650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_replaceAllUText(re, &replText, &bufferText, &status); 508750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 508850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &bufferText); 508927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); 509050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 509150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_close(re); 509250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replText); 509350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 509450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 509550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 509650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 509750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * splitUText() uses the C++ API directly, and the UnicodeString version uses mutable UTexts, 509850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * so we don't need to test it here. 509950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 510050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 510150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&bufferText); 510250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&patternText); 510350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 510450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 510550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------- 510650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 510750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Bug7651 Regex pattern that exceeds default operator stack depth in matcher. 510850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 510950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------- 511050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Bug7651() { 511150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString pattern1("((?<![A-Za-z0-9])[#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|(?<![A-Za-z0-9_])[@\\uff20][A-Za-z0-9_]+(?:\\/[\\w-]+)?|(https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|\\$[A-Za-z]+)"); 511250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The following should exceed the default operator stack depth in the matcher, i.e. force the matcher to malloc instead of using fSmallData. 511327f654740f2a26ad62a5c155af9199af9e69b889claireho // It will cause a segfault if RegexMatcher tries to use fSmallData instead of malloc'ing the memory needed (see init2) for the pattern operator stack allocation. 511450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString pattern2("((https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|(?<![A-Za-z0-9_])[\\@\\uff20][A-Za-z0-9_]+(?:\\/[\\w\\-]+)?|(?<![A-Za-z0-9])[\\#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|\\$[A-Za-z]+)"); 511550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString s("#ff @abcd This is test"); 511650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *REPattern = NULL; 511750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *REMatcher = NULL; 511850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 511950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 512050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 512150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REPattern = RegexPattern::compile(pattern1, 0, pe, status); 512250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 512350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REMatcher = REPattern->matcher(s, status); 512450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 512550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(REMatcher->find()); 512650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(REMatcher->start(status) == 0); 512750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REPattern; 512850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REMatcher; 512950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 513050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 513150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REPattern = RegexPattern::compile(pattern2, 0, pe, status); 513250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 513350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REMatcher = REPattern->matcher(s, status); 513450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 513550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(REMatcher->find()); 513650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(REMatcher->start(status) == 0); 513750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REPattern; 513850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REMatcher; 513950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 514050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 514150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 514227f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::Bug7740() { 514327f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status = U_ZERO_ERROR; 514427f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString pattern = "(a)"; 514527f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString text = "abcdef"; 514627f654740f2a26ad62a5c155af9199af9e69b889claireho RegexMatcher *m = new RegexMatcher(pattern, text, 0, status); 514727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 514827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(m->lookingAt(status)); 514927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 515027f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ILLEGAL_ARGUMENT_ERROR; 515127f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString s = m->group(1, status); // Bug 7740: segfault here. 515227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 515327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(s == ""); 515427f654740f2a26ad62a5c155af9199af9e69b889claireho delete m; 515527f654740f2a26ad62a5c155af9199af9e69b889claireho} 515627f654740f2a26ad62a5c155af9199af9e69b889claireho 5157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Bug 8479: was crashing whith a Bogus UnicodeString as input. 5158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 5159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::Bug8479() { 5160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UErrorCode status = U_ZERO_ERROR; 516127f654740f2a26ad62a5c155af9199af9e69b889claireho 5162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher* const pMatcher = new RegexMatcher("\\Aboo\\z", UREGEX_DOTALL|UREGEX_CASE_INSENSITIVE, status); 5163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 5164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_SUCCESS(status)) 5165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho { 5166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString str; 5167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho str.setToBogus(); 5168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pMatcher->reset(str); 5169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ZERO_ERROR; 5170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pMatcher->matches(status); 5171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 5172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete pMatcher; 5173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 517527f654740f2a26ad62a5c155af9199af9e69b889claireho 517627f654740f2a26ad62a5c155af9199af9e69b889claireho 5177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Bug 7029 5178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::Bug7029() { 5179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UErrorCode status = U_ZERO_ERROR; 5180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 5181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status); 5182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString text = "abc.def"; 5183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString splits[10]; 5184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 5185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t numFields = pMatcher->split(text, splits, 10, status); 5186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 5187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(numFields == 8); 5188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete pMatcher; 5189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 5190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 5191103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// Bug 9283 5192103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// This test is checking for the existance of any supplemental characters that case-fold 5193103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// to a bmp character. 5194103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// 5195103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// At the time of this writing there are none. If any should appear in a subsequent release 5196103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// of Unicode, the code in regular expressions compilation that determines the longest 5197103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// posssible match for a literal string will need to be enhanced. 5198103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// 5199103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength() 5200103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// for details on what to do in case of a failure of this test. 5201103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// 5202103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusvoid RegexTest::Bug9283() { 5203103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UErrorCode status = U_ZERO_ERROR; 5204103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]]", status); 5205103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius REGEX_CHECK_STATUS; 5206103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t index; 5207103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 c; 5208103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius for (index=0; ; index++) { 5209103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius c = supplementalsWithCaseFolding.charAt(index); 5210103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (c == -1) { 5211103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 5212103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5213103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UnicodeString cf = UnicodeString(c).foldCase(); 5214103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius REGEX_ASSERT(cf.length() >= 2); 5215103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5216103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 5217103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 5218103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 5219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::CheckInvBufSize() { 5220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(inv_next>=INV_BUFSIZ) { 5221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %d )\n", 5222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho __FILE__, INV_BUFSIZ, inv_next); 5223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 5224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next); 5225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 5227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 5228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 5229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5230