regextst.cpp revision b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2
1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/******************************************************************** 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT: 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Copyright (c) 2002-2011, International Business Machines Corporation and 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************/ 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// regextst.cpp 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// ICU Regular Expressions test, part of intltest. 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho NOTE!! 15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho PLEASE be careful about ASCII assumptions in this test. 17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho This test is one of the worst repeat offenders. 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho If you have questions, contact someone on the ICU PMC 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho who has access to an EBCDIC system. 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "intltest.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h" 2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/ustring.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regextst.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h> 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h> 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 3627f654740f2a26ad62a5c155af9199af9e69b889claireho#include "cstring.h" 3727f654740f2a26ad62a5c155af9199af9e69b889claireho#include "uinvchar.h" 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define SUPPORT_MUTATING_INPUT_STRING 0 4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test class boilerplate 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexTest::RegexTest() 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexTest::~RegexTest() 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) logln("TestSuite RegexTest: "); 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (index) { 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: name = "Basic"; 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) Basic(); 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: name = "API_Match"; 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) API_Match(); 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: name = "API_Replace"; 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) API_Replace(); 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: name = "API_Pattern"; 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) API_Pattern(); 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case 4: 7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO 7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho name = "Extended"; 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) Extended(); 7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho name = "skip"; 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 5: name = "Errors"; 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) Errors(); 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 6: name = "PerlTests"; 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) PerlTests(); 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 88c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case 7: name = "Callbacks"; 89b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) Callbacks(); 90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 9127f654740f2a26ad62a5c155af9199af9e69b889claireho case 8: name = "FindProgressCallbacks"; 9227f654740f2a26ad62a5c155af9199af9e69b889claireho if (exec) FindProgressCallbacks(); 9327f654740f2a26ad62a5c155af9199af9e69b889claireho break; 9427f654740f2a26ad62a5c155af9199af9e69b889claireho case 9: name = "Bug 6149"; 95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) Bug6149(); 96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 9727f654740f2a26ad62a5c155af9199af9e69b889claireho case 10: name = "UTextBasic"; 9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) UTextBasic(); 9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 10027f654740f2a26ad62a5c155af9199af9e69b889claireho case 11: name = "API_Match_UTF8"; 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) API_Match_UTF8(); 10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 10327f654740f2a26ad62a5c155af9199af9e69b889claireho case 12: name = "API_Replace_UTF8"; 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) API_Replace_UTF8(); 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 10627f654740f2a26ad62a5c155af9199af9e69b889claireho case 13: name = "API_Pattern_UTF8"; 10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) API_Pattern_UTF8(); 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 10927f654740f2a26ad62a5c155af9199af9e69b889claireho case 14: name = "PerlTestsUTF8"; 11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) PerlTestsUTF8(); 11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 11227f654740f2a26ad62a5c155af9199af9e69b889claireho case 15: name = "PreAllocatedUTextCAPI"; 11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) PreAllocatedUTextCAPI(); 11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 11527f654740f2a26ad62a5c155af9199af9e69b889claireho case 16: name = "Bug 7651"; 11627f654740f2a26ad62a5c155af9199af9e69b889claireho if (exec) Bug7651(); 11727f654740f2a26ad62a5c155af9199af9e69b889claireho break; 11827f654740f2a26ad62a5c155af9199af9e69b889claireho case 17: name = "Bug 7740"; 11927f654740f2a26ad62a5c155af9199af9e69b889claireho if (exec) Bug7740(); 12027f654740f2a26ad62a5c155af9199af9e69b889claireho break; 121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho case 18: name = "Bug 8479"; 122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (exec) Bug8479(); 123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho case 19: name = "Bug 7029"; 125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (exec) Bug7029(); 126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho case 20: name = "CheckInvBufSize"; 128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (exec) CheckInvBufSize(); 129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: name = ""; 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; //needed to end loop 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 13827f654740f2a26ad62a5c155af9199af9e69b889claireho/** 13927f654740f2a26ad62a5c155af9199af9e69b889claireho * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage 14027f654740f2a26ad62a5c155af9199af9e69b889claireho * into ASCII. 14127f654740f2a26ad62a5c155af9199af9e69b889claireho * @see utext_openUTF8 14227f654740f2a26ad62a5c155af9199af9e69b889claireho */ 14327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status); 14427f654740f2a26ad62a5c155af9199af9e69b889claireho 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Error Checking / Reporting macros used in all of the tests. 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 15127f654740f2a26ad62a5c155af9199af9e69b889clairehostatic void utextToPrintable(char *buf, int32_t bufLen, UText *text) { 15227f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t oldIndex = utext_getNativeIndex(text); 15327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_setNativeIndex(text, 0); 15427f654740f2a26ad62a5c155af9199af9e69b889claireho char *bufPtr = buf; 15527f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c = utext_next32From(text, 0); 15627f654740f2a26ad62a5c155af9199af9e69b889claireho while ((c != U_SENTINEL) && (bufPtr < buf+bufLen)) { 15727f654740f2a26ad62a5c155af9199af9e69b889claireho if (0x000020<=c && c<0x00007e) { 15827f654740f2a26ad62a5c155af9199af9e69b889claireho *bufPtr = c; 15927f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 16027f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0 16127f654740f2a26ad62a5c155af9199af9e69b889claireho sprintf(bufPtr,"U+%04X", c); 16227f654740f2a26ad62a5c155af9199af9e69b889claireho bufPtr+= strlen(bufPtr)-1; 16327f654740f2a26ad62a5c155af9199af9e69b889claireho#else 16427f654740f2a26ad62a5c155af9199af9e69b889claireho *bufPtr = '%'; 16527f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 16627f654740f2a26ad62a5c155af9199af9e69b889claireho } 16727f654740f2a26ad62a5c155af9199af9e69b889claireho bufPtr++; 16827f654740f2a26ad62a5c155af9199af9e69b889claireho c = UTEXT_NEXT32(text); 16927f654740f2a26ad62a5c155af9199af9e69b889claireho } 17027f654740f2a26ad62a5c155af9199af9e69b889claireho *bufPtr = 0; 17127f654740f2a26ad62a5c155af9199af9e69b889claireho#if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY) 17227f654740f2a26ad62a5c155af9199af9e69b889claireho char *ebuf = (char*)malloc(bufLen); 17327f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_eastrncpy((unsigned char*)ebuf, (const unsigned char*)buf, bufLen); 17427f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_strncpy(buf, ebuf, bufLen); 17527f654740f2a26ad62a5c155af9199af9e69b889claireho free((void*)ebuf); 17627f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 17727f654740f2a26ad62a5c155af9199af9e69b889claireho utext_setNativeIndex(text, oldIndex); 17827f654740f2a26ad62a5c155af9199af9e69b889claireho} 17927f654740f2a26ad62a5c155af9199af9e69b889claireho 180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic inline UChar toHex(int32_t i) { 181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return (UChar)(i + (i < 10 ? 0x30 : (0x41 - 10))); 182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic UnicodeString& escape(const UnicodeString& s, UnicodeString& result) { 185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for (int32_t i=0; i<s.length(); ++i) { 186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar c = s[i]; 187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ((c <= (UChar)0x7F) && (c>0)) { 188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result += c; 189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result += (UChar)0x5c; 191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result += (UChar)0x75; 192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result += toHex((c >> 12) & 0xF); 193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result += toHex((c >> 8) & 0xF); 194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result += toHex((c >> 4) & 0xF); 195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result += toHex( c & 0xF); 196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic char ASSERT_BUF[1024]; 202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic const char* extractToAssertBuf(const UnicodeString& message) { 204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(message.length()==0) { 205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strcpy(ASSERT_BUF, "[[empty UnicodeString]]"); 206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString buf; 208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho escape(message, buf); 209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(buf.length()==0) { 210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strcpy(ASSERT_BUF, "[[escape() returned 0 chars]]"); 211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buf.extract(0, 0x7FFFFFFF, ASSERT_BUF, sizeof(ASSERT_BUF)-1); 213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(ASSERT_BUF[0]==0) { 214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ASSERT_BUF[0]=0; 215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(int32_t i=0;i<buf.length();i++) { 216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar ch = buf[i]; 217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch); 218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0; 223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return ASSERT_BUF; 224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 22727f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);} 22827f654740f2a26ad62a5c155af9199af9e69b889claireho 22927f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure. status=%s", \ 23027f654740f2a26ad62a5c155af9199af9e69b889claireho __FILE__, __LINE__, u_errorName(status)); return;}} 23127f654740f2a26ad62a5c155af9199af9e69b889claireho 23227f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};} 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\ 2356d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queruif (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status=%s, got %s", \ 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru __LINE__, u_errorName(errcode), u_errorName(status));};} 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \ 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "RegexTest failure at line %d, from %d. status=%d\n",__LINE__, (line), status); }} 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \ 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}} 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define REGEX_ASSERT_UNISTR(ustr,inv) {if (!(ustr==inv)) {errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s,%s) failed \n", __FILE__, __LINE__, extractToAssertBuf(ustr),inv);};} 245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 24627f654740f2a26ad62a5c155af9199af9e69b889claireho/** 24727f654740f2a26ad62a5c155af9199af9e69b889claireho * @param expected expected text in UTF-8 (not platform) codepage 24827f654740f2a26ad62a5c155af9199af9e69b889claireho */ 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::assertUText(const char *expected, UText *actual, const char *file, int line) { 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText expectedText = UTEXT_INITIALIZER; 25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&expectedText, expected, -1, &status); 25327f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_FAILURE(status)) { 25427f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: assertUText: error %s calling utext_openUTF8(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected)); 25527f654740f2a26ad62a5c155af9199af9e69b889claireho return; 25627f654740f2a26ad62a5c155af9199af9e69b889claireho } 25727f654740f2a26ad62a5c155af9199af9e69b889claireho if(utext_nativeLength(&expectedText)==0 && (strlen(expected)!=0)) { 25827f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: assertUText: expected is %d utf-8 bytes, but utext_nativeLength(expectedText) returned 0.", file, line, strlen(expected)); 25927f654740f2a26ad62a5c155af9199af9e69b889claireho return; 26027f654740f2a26ad62a5c155af9199af9e69b889claireho } 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(actual, 0); 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (utext_compare(&expectedText, -1, actual, -1) != 0) { 26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char buf[201 /*21*/]; 26427f654740f2a26ad62a5c155af9199af9e69b889claireho char expectedBuf[201]; 26527f654740f2a26ad62a5c155af9199af9e69b889claireho utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual); 26627f654740f2a26ad62a5c155af9199af9e69b889claireho utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText); 26727f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: assertUText: Failure: expected \"%s\" (%d chars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual)); 26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&expectedText); 27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 27127f654740f2a26ad62a5c155af9199af9e69b889claireho/** 27227f654740f2a26ad62a5c155af9199af9e69b889claireho * @param expected invariant (platform local text) input 27327f654740f2a26ad62a5c155af9199af9e69b889claireho */ 27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 27527f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::assertUTextInvariant(const char *expected, UText *actual, const char *file, int line) { 27627f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status = U_ZERO_ERROR; 27727f654740f2a26ad62a5c155af9199af9e69b889claireho UText expectedText = UTEXT_INITIALIZER; 27827f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&expectedText, expected, -1, &status); 27927f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_FAILURE(status)) { 28027f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: assertUTextInvariant: error %s calling regextst_openUTF8FromInvariant(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected)); 28127f654740f2a26ad62a5c155af9199af9e69b889claireho return; 28227f654740f2a26ad62a5c155af9199af9e69b889claireho } 28327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_setNativeIndex(actual, 0); 28427f654740f2a26ad62a5c155af9199af9e69b889claireho if (utext_compare(&expectedText, -1, actual, -1) != 0) { 28527f654740f2a26ad62a5c155af9199af9e69b889claireho char buf[201 /*21*/]; 28627f654740f2a26ad62a5c155af9199af9e69b889claireho char expectedBuf[201]; 28727f654740f2a26ad62a5c155af9199af9e69b889claireho utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual); 28827f654740f2a26ad62a5c155af9199af9e69b889claireho utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText); 28927f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual)); 29027f654740f2a26ad62a5c155af9199af9e69b889claireho } 29127f654740f2a26ad62a5c155af9199af9e69b889claireho utext_close(&expectedText); 29227f654740f2a26ad62a5c155af9199af9e69b889claireho} 29327f654740f2a26ad62a5c155af9199af9e69b889claireho 29427f654740f2a26ad62a5c155af9199af9e69b889claireho/** 29527f654740f2a26ad62a5c155af9199af9e69b889claireho * Assumes utf-8 input 29627f654740f2a26ad62a5c155af9199af9e69b889claireho */ 29727f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actual), __FILE__, __LINE__) 29827f654740f2a26ad62a5c155af9199af9e69b889claireho/** 29927f654740f2a26ad62a5c155af9199af9e69b889claireho * Assumes Invariant input 30027f654740f2a26ad62a5c155af9199af9e69b889claireho */ 30127f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((expected), (actual), __FILE__, __LINE__) 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This buffer ( inv_buf ) is used to hold the UTF-8 strings 305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * passed into utext_openUTF8. An error will be given if 306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * INV_BUFSIZ is too small. It's only used on EBCDIC systems. 307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define INV_BUFSIZ 2048 /* increase this if too small */ 310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t inv_next=0; 312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if U_CHARSET_FAMILY!=U_ASCII_FAMILY 314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic char inv_buf[INV_BUFSIZ]; 315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif 316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t length, UErrorCode *status) { 318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length==-1) length=strlen(inv); 319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if U_CHARSET_FAMILY==U_ASCII_FAMILY 320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inv_next+=length; 321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return utext_openUTF8(ut, inv, length, status); 322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#else 323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(inv_next+length+1>INV_BUFSIZ) { 324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fprintf(stderr, "%s:%d Error: INV_BUFSIZ #defined to be %d but needs to be at least %d.\n", 325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho __FILE__, __LINE__, INV_BUFSIZ, (inv_next+length+1)); 326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *status = U_MEMORY_ALLOCATION_ERROR; 327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho unsigned char *buf = (unsigned char*)inv_buf+inv_next; 331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_aestrncpy(buf, (const uint8_t*)inv, length); 332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inv_next+=length; 333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if 0 335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fprintf(stderr, " Note: INV_BUFSIZ at %d, used=%d\n", INV_BUFSIZ, inv_next); 336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif 337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return utext_openUTF8(ut, (const char*)buf, length, status); 339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif 340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// REGEX_TESTLM Macro + invocation function to simplify writing quick tests 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// for the LookingAt() and Match() functions. 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// usage: 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// REGEX_TESTLM("pattern", "input text", lookingAt expected, matches expected); 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The expected results are UBool - TRUE or FALSE. 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The input text is unescaped. The pattern is not. 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define REGEX_TESTLM(pat, text, looking, match) {doRegexLMTest(pat, text, looking, match, __LINE__);doRegexLMTestUTF8(pat, text, looking, match, __LINE__);} 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line) { 360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString pattern(pat, -1, US_INV); 361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString inputText(text, -1, US_INV); 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *REPattern = NULL; 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *REMatcher = NULL; 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool retVal = TRUE; 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString patString(pat, -1, US_INV); 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REPattern = RegexPattern::compile(patString, 0, pe, status); 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 3716d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("RegexTest failure in RegexPattern::compile() at line %d. Status = %s", 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru line, u_errorName(status)); 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (line==376) { RegexPatternDump(REPattern);} 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString inputString(inputText); 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString unEscapedInput = inputString.unescape(); 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REMatcher = REPattern->matcher(unEscapedInput, status); 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure in REPattern::matcher() at line %d. Status = %s\n", 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru line, u_errorName(status)); 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool actualmatch; 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru actualmatch = REMatcher->lookingAt(status); 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure in lookingAt() at line %d. Status = %s\n", 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru line, u_errorName(status)); 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retVal = FALSE; 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (actualmatch != looking) { 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest: wrong return from lookingAt() at line %d.\n", line); 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retVal = FALSE; 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru actualmatch = REMatcher->matches(status); 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure in matches() at line %d. Status = %s\n", 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru line, u_errorName(status)); 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retVal = FALSE; 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (actualmatch != match) { 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest: wrong return from matches() at line %d.\n", line); 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retVal = FALSE; 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (retVal == FALSE) { 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPatternDump(REPattern); 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete REPattern; 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete REMatcher; 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retVal; 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool looking, UBool match, int32_t line) { 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText pattern = UTEXT_INITIALIZER; 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t inputUTF8Length; 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *textChars = NULL; 42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText inputText = UTEXT_INITIALIZER; 42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *REPattern = NULL; 42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *REMatcher = NULL; 42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool retVal = TRUE; 43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 43127f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&pattern, pat, -1, &status); 43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REPattern = RegexPattern::compile(&pattern, 0, pe, status); 43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8). Status = %s\n", 43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, u_errorName(status)); 43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString inputString(text, -1, US_INV); 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString unEscapedInput = inputString.unescape(); 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status)); 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); 44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status); 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // UTF-8 does not allow unpaired surrogates, so this could actually happen 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho logln("RegexTest unable to convert input to UTF8 at line %d. Status = %s\n", line, u_errorName(status)); 44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; // not a failure of the Regex engine 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; // buffer overflow 45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho textChars = new char[inputUTF8Length+1]; 45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(), status); 45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&inputText, textChars, inputUTF8Length, &status); 45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 455b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REMatcher = &REPattern->matcher(status)->reset(&inputText); 45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Status = %s\n", 45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, u_errorName(status)); 45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool actualmatch; 46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho actualmatch = REMatcher->lookingAt(status); 46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("RegexTest failure in lookingAt() at line %d (UTF8). Status = %s\n", 46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, u_errorName(status)); 46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retVal = FALSE; 46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (actualmatch != looking) { 47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("RegexTest: wrong return from lookingAt() at line %d (UTF8).\n", line); 47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retVal = FALSE; 47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho actualmatch = REMatcher->matches(status); 47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("RegexTest failure in matches() at line %d (UTF8). Status = %s\n", 47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, u_errorName(status)); 47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retVal = FALSE; 48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (actualmatch != match) { 48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", line); 48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retVal = FALSE; 48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (retVal == FALSE) { 48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPatternDump(REPattern); 48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REPattern; 49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REMatcher; 49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&inputText); 49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&pattern); 49450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] textChars; 49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retVal; 49650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// REGEX_ERR Macro + invocation function to simplify writing tests 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// regex tests for incorrect patterns 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// usage: 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// REGEX_ERR("pattern", expected error line, column, expected status); 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ERR(pat, line, col, status) regex_err(pat, line, col, status, __LINE__); 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol, 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode expectedStatus, int32_t line) { 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pattern(pat); 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *callerPattern = NULL; 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compile the caller's pattern 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString patString(pat); 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru callerPattern = RegexPattern::compile(patString, 0, pe, status); 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (status != expectedStatus) { 5256d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status)); 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (status != U_ZERO_ERROR) { 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pe.line != errLine || pe.offset != errCol) { 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Line %d: incorrect line/offset from UParseError. Expected %d/%d; got %d/%d.\n", 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru line, errLine, errCol, pe.line, pe.offset); 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete callerPattern; 53650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compile again, using a UTF-8-based UText 53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText patternText = UTEXT_INITIALIZER; 54127f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&patternText, pat, -1, &status); 54250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho callerPattern = RegexPattern::compile(&patternText, 0, pe, status); 54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status != expectedStatus) { 54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status)); 54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 54650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status != U_ZERO_ERROR) { 54750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pe.line != errLine || pe.offset != errCol) { 54850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Line %d: incorrect line/offset from UParseError. Expected %d/%d; got %d/%d.\n", 54950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, errLine, errCol, pe.line, pe.offset); 55050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 55150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete callerPattern; 55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&patternText); 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Basic Check for basic functionality of regex pattern matching. 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Avoid the use of REGEX_FIND test macro, which has 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// substantial dependencies on basic Regex functionality. 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::Basic() { 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Debug - slide failing test cases early 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE); 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern::compile("^(?:a?b?)*$", 0, pe, status); 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // REGEX_FIND("(?>(abc{2,4}?))(c*)", "<0>ab<1>cc</1><2>ccc</2></0>ddd"); 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX===================="); 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exit(1); 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Pattern with parentheses 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)ring", "stabcring thing", TRUE, FALSE); 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)ring", "stabcring", TRUE, TRUE); 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)ring", "stabcrung", FALSE, FALSE); 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Patterns with * 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)*ring", "string", TRUE, TRUE); 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)*ring", "stabcring", TRUE, TRUE); 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)*ring", "stabcabcring", TRUE, TRUE); 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)*ring", "stabcabcdring", FALSE, FALSE); 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("st(abc)*ring", "stabcabcabcring etc.", TRUE, FALSE); 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a*", "", TRUE, TRUE); 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a*", "b", TRUE, FALSE); 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Patterns with "." 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".", "abc", TRUE, FALSE); 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("...", "abc", TRUE, TRUE); 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("....", "abc", FALSE, FALSE); 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".*", "abcxyz123", TRUE, TRUE); 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab.*xyz", "abcdefghij", FALSE, FALSE); 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab.*xyz", "abcdefg...wxyz", TRUE, TRUE); 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz", TRUE, TRUE); 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz...", TRUE, FALSE); 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Patterns with * applied to chars at end of literal string 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("abc*", "ab", TRUE, TRUE); 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("abc*", "abccccc", TRUE, TRUE); 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Supplemental chars match as single chars, not a pair of surrogates. 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".", "\\U00011000", TRUE, TRUE); 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("...", "\\U00011000x\\U00012002", TRUE, TRUE); 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("...", "\\U00011000x\\U00012002y", TRUE, FALSE); 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UnicodeSets in the pattern 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[1-6]", "1", TRUE, TRUE); 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[1-6]", "3", TRUE, TRUE); 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[1-6]", "7", FALSE, FALSE); 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE); 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE); 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a[1-6]b", "a3b", TRUE, TRUE); 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a[0-9]*b", "a123b", TRUE, TRUE); 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a[0-9]*b", "abc", TRUE, FALSE); 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[\\p{Nd}]*", "123456", TRUE, TRUE); 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[\\p{Nd}]*", "a123456", TRUE, FALSE); // note that * matches 0 occurences. 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("[a][b][[:Zs:]]*", "ab ", TRUE, TRUE); 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // OR operator in patterns 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|b)", "a", TRUE, TRUE); 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|b)", "b", TRUE, TRUE); 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|b)", "c", FALSE, FALSE); 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a|b", "b", TRUE, TRUE); 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabc", TRUE, TRUE); 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabdc", TRUE, FALSE); 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "ac", TRUE, TRUE); 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "123", TRUE, TRUE); 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "123", TRUE, TRUE); 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "222211111czzzzw", TRUE, FALSE); 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // + 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab+", "abbc", TRUE, FALSE); 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab+c", "ac", FALSE, FALSE); 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("b+", "", FALSE, FALSE); 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("(abc|def)+", "defabc", TRUE, TRUE); 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".+y", "zippity dooy dah ", TRUE, FALSE); 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".+y", "zippity dooy", TRUE, TRUE); 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ? 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab?", "ab", TRUE, TRUE); 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab?", "a", TRUE, TRUE); 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab?", "ac", TRUE, FALSE); 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("ab?", "abb", TRUE, FALSE); 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a(b|c)?d", "abd", TRUE, TRUE); 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a(b|c)?d", "acd", TRUE, TRUE); 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a(b|c)?d", "ad", TRUE, TRUE); 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a(b|c)?d", "abcd", FALSE, FALSE); 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("a(b|c)?d", "ab", FALSE, FALSE); 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Escape sequences that become single literal chars, handled internally 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // by ICU's Unescape. 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // REGEX_TESTLM("\101\142", "Ab", TRUE, TRUE); // Octal TODO: not implemented yet. 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\a", "\\u0007", TRUE, TRUE); // BEL 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\cL", "\\u000c", TRUE, TRUE); // Control-L 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\e", "\\u001b", TRUE, TRUE); // Escape 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\f", "\\u000c", TRUE, TRUE); // Form Feed 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\n", "\\u000a", TRUE, TRUE); // new line 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\r", "\\u000d", TRUE, TRUE); // CR 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\t", "\\u0009", TRUE, TRUE); // Tab 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\u1234", "\\u1234", TRUE, TRUE); 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\U00001234", "\\u1234", TRUE, TRUE); 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".*\\Ax", "xyz", TRUE, FALSE); // \A matches only at the beginning of input 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM(".*\\Ax", " xyz", FALSE, FALSE); // \A matches only at the beginning of input 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Escape of special chars in patterns 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_TESTLM("\\\\\\|\\(\\)\\[\\{\\~\\$\\*\\+\\?\\.", "\\\\|()[{~$*+?.", TRUE, TRUE); 70750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 71050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 71150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 71250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// UTextBasic Check for quirks that are specific to the UText 71350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// implementation. 71450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 71550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 71650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::UTextBasic() { 71727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 71850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 71950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText pattern = UTEXT_INITIALIZER; 72027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_abc, -1, &status); 72150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher matcher(&pattern, 0, status); 72250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 72350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 72450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 72527f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_abc, -1, &status); 72650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 72750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher.reset(&input); 72850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 72927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); 73050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 73150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher.reset(matcher.inputText()); 73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 73327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); 73450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 73550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&pattern); 73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// API_Match Test that the API for class RegexMatcher 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// is present and nominally working, but excluding functions 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// implementing replace operations. 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Match() { 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags = 0; 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Debug - slide failing test cases early 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Simple pattern compilation 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re("abc"); 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat2; 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat2 = RegexPattern::compile(re, flags, pe, status); 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString inStr1 = "abcdef this is a test"; 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString instr2 = "not abc"; 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString empty = ""; 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Matcher creation and reset. 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *m1 = pat2->matcher(inStr1, status); 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(status) == TRUE); 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->input() == inStr1); 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(instr2); 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(status) == FALSE); 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->input() == instr2); 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(inStr1); 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->input() == inStr1); 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(status) == TRUE); 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(empty); 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(status) == FALSE); 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->input() == empty); 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(&m1->pattern() == pat2); 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // reset(pos, status) 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(inStr1); 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(4, status); 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->input() == inStr1); 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(status) == TRUE); 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(-1, status); 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(0, status); 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len = m1->input().length(); 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(len-1, status); 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(len, status); 81627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 81727f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 81827f654740f2a26ad62a5c155af9199af9e69b889claireho 81927f654740f2a26ad62a5c155af9199af9e69b889claireho m1->reset(len+1, status); 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match(pos, status) 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(instr2); 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(4, status) == TRUE); 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(); 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(3, status) == FALSE); 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(); 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(5, status) == FALSE); 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(4, status) == TRUE); 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(-1, status) == FALSE); 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Match() at end of string should fail, but should not 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // be an error. 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = m1->input().length(); 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(len, status) == FALSE); 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Match beyond end of string should fail with an error. 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->matches(len+1, status) == FALSE); 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Successful match at end of string. 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m("A?", 0, status); // will match zero length string. 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(inStr1); 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = inStr1.length(); 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.matches(len, status) == TRUE); 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(empty); 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.matches(0, status) == TRUE); 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // lookingAt(pos, status) 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m1->reset(instr2); // "not abc" 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(5, status) == FALSE); 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(3, status) == FALSE); 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE); 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = m1->input().length(); 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(len, status) == FALSE); 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m1->lookingAt(len+1, status) == FALSE); 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete m1; 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat2; 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Capture Group. 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RegexMatcher::start(); 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RegexMatcher::end(); 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RegexMatcher::groupCount(); 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags=0; 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re("01(23(45)67)(.*)"); 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString data = "0123456789"; 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher = pat->matcher(data, status); 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->lookingAt(status) == TRUE); 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const int32_t matchStarts[] = {0, 2, 4, 8}; 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const int32_t matchEnds[] = {10, 8, 6, 10}; 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<4; i++) { 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t actualStart = matcher->start(i, status); 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (actualStart != matchStarts[i]) { 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure at line %d, index %d. Expected %d, got %d\n", 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru __LINE__, i, matchStarts[i], actualStart); 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t actualEnd = matcher->end(i, status); 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (actualEnd != matchEnds[i]) { 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("RegexTest failure at line %d index %d. Expected %d, got %d\n", 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru __LINE__, i, matchEnds[i], actualEnd); 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(0, status) == matcher->start(status)); 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->end(0, status) == matcher->end(status)); 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(); 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->lookingAt(status); 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->group(status) == "0123456789"); 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->group(0, status) == "0123456789"); 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->group(1, status) == "234567" ); 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->group(2, status) == "45" ); 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->group(3, status) == "89" ); 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(); 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher; 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat; 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags=0; 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re("abc"); 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString data = ".abc..abc...abc.."; 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 012345678901234567 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher = pat->matcher(data, status); 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 1); 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 6); 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 12); 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find() == FALSE); 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find() == FALSE); 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(); 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 1); 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(0, status)); 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 1); 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(1, status)); 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 1); 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(2, status)); 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 6); 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(12, status)); 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 12); 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(13, status) == FALSE); 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(16, status) == FALSE); 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find(17, status) == FALSE); 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE); 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR); 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->groupCount() == 0); 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher; 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat; 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find, with \G in pattern (true if at the end of a previous match). 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags=0; 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString re(".*?(?:(\\Gabc)|(abc))", -1, US_INV); 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString data = ".abcabc.abc.."; 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 012345678901234567 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher = pat->matcher(data, status); 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 0); 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(1, status) == -1); 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(2, status) == 1); 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->find()); 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(status) == 4); 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(1, status) == 4); 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(matcher->start(2, status) == -1); 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher; 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat; 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find with zero length matches, match position should bump ahead 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to prevent loops. 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m("(?= ?)", 0, status); // This pattern will zero-length matches anywhere, 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // using an always-true look-ahead. 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s(" "); 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(s); 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; ; i++) { 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (m.find() == FALSE) { 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.start(status) == i); 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.end(status) == i); 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(i==5); 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check that the bump goes over surrogate pairs OK 1055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s = UNICODE_STRING_SIMPLE("\\U00010001\\U00010002\\U00010003\\U00010004"); 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s = s.unescape(); 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(s); 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; ; i+=2) { 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (m.find() == FALSE) { 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.start(status) == i); 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.end(status) == i); 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(i==10); 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find() loop breaking test. 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // with pattern of /.?/, should see a series of one char matches, then a single 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match of zero length at the end of the input string. 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m(".?", 0, status); 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s(" "); 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(s); 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; ; i++) { 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (m.find() == FALSE) { 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.start(status) == i); 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(i==5); 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Matchers with no input string behave as if they had an empty input string. 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m(".?", 0, status); 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.find()); 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.start(status) == 0); 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m.input() == ""); 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *p = RegexPattern::compile(".", 0, status); 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *m = p->matcher(status); 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m->find() == FALSE); 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m->input() == ""); 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete m; 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete p; 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Regions 1114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString testString("This is test data"); 1118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher m(".*", testString, 0, status); 1119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionStart() == 0); 1121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionEnd() == testString.length()); 1122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru m.region(2,4, status); 1126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.matches(status)); 1128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.start(status)==2); 1129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.end(status)==4); 1130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru m.reset(); 1133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionStart() == 0); 1134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionEnd() == testString.length()); 1135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString shorterString("short"); 1137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru m.reset(shorterString); 1138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionStart() == 0); 1139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.regionEnd() == shorterString.length()); 1140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); 1143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 1144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.reset()); 1145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 1146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); 1148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.reset()); 1150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); 1154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 1155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.reset()); 1156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 1157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); 1159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(&m == &m.reset()); 1161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // hitEnd() and requireEnd() 1167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString testString("aabb"); 1171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher m1(".*", testString, 0, status); 1172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m1.lookingAt(status) == TRUE); 1173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m1.hitEnd() == TRUE); 1174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m1.requireEnd() == FALSE); 1175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 1178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher m2("a*", testString, 0, status); 1179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m2.lookingAt(status) == TRUE); 1180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m2.hitEnd() == FALSE); 1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m2.requireEnd() == FALSE); 1182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 1185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher m3(".*$", testString, 0, status); 1186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m3.lookingAt(status) == TRUE); 1187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m3.hitEnd() == TRUE); 1188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(m3.requireEnd() == TRUE); 1189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compilation error on reset with UChar * 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // These were a hazard that people were stumbling over with runtime errors. 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Changed them to compiler errors by adding private methods that more closely 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // matched the incorrect use of the functions. 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar ucharString[20]; 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m(".", 0, status); 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(ucharString); // should not compile. 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *p = RegexPattern::compile(".", 0, status); 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *m2 = p->matcher(ucharString, status); // should not compile. 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m3(".", ucharString, 0, status); // Should not compile 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Time Outs. 1215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Note: These tests will need to be changed when the regexp engine is 1216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // able to detect and cut short the exponential time behavior on 1217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // this type of match. 1218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Enough 'a's in the string to cause the match to time out. 1222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // (Each on additonal 'a' doubles the time) 1223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa"); 1224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher("(a+)+b", testString, 0, status); 1225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getTimeLimit() == 0); 1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setTimeLimit(100, status); 1228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getTimeLimit() == 100); 1229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(status == U_REGEX_TIME_OUT); 1231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Few enough 'a's to slip in under the time limit. 1235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString testString("aaaaaaaaaaaaaaaaaa"); 1236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher("(a+)+b", testString, 0, status); 1237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setTimeLimit(100, status); 1239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Stack Limits 1245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 124850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000, filled with 'A' 1249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations 1251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // of the '+', and makes the stack frames larger. 1252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher("(A)+A$", testString, 0, status); 1253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // With the default stack, this match should fail to run 1255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); 1257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // With unlimited stack, it should run 1259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 1260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setStackLimit(0, status); 1261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.lookingAt(status) == TRUE); 1263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getStackLimit() == 0); 1265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // With a limited stack, it the match should fail 1267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 1268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setStackLimit(10000, status); 1269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); 1271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getStackLimit() == 10000); 1272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // A pattern that doesn't save state should work with 1275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // a minimal sized stack 1276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString testString = "abc"; 1279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher("abc", testString, 0, status); 1280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setStackLimit(30, status); 1282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.matches(status) == TRUE); 1284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getStackLimit() == 30); 1286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Negative stack sizes should fail 1288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 1289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setStackLimit(1000, status); 1290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 1291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setStackLimit(-1, status); 1292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.getStackLimit() == 1000); 1294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// API_Replace API test for class RegexMatcher, testing the 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Replace family of functions. 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Replace() { 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Replace 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags=0; 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re("abc"); 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString data = ".abc..abc...abc.."; 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 012345678901234567 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher = pat->matcher(data, status); 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Plain vanilla matches. 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString dest; 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceFirst("yz", status); 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ".yz..abc...abc.."); 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceAll("yz", status); 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ".yz..yz...yz.."); 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Plain vanilla non-matches. 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString d2 = ".abx..abx...abx.."; 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(d2); 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceFirst("yz", status); 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ".abx..abx...abx.."); 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceAll("yz", status); 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ".abx..abx...abx.."); 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Empty source string 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString d3 = ""; 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(d3); 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceFirst("yz", status); 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ""); 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceAll("yz", status); 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == ""); 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Empty substitution string 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(data); // ".abc..abc...abc.." 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceFirst("", status); 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "...abc...abc.."); 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceAll("", status); 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "........"); 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match whole string 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString d4 = "abc"; 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(d4); 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceFirst("xyz", status); 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "xyz"); 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher->replaceAll("xyz", status); 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "xyz"); 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Capture Group, simple case 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re2("a(..)"); 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat2 = RegexPattern::compile(re2, flags, pe, status); 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString d5 = "abcdefg"; 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher2 = pat2->matcher(d5, status); 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher2->replaceFirst("$1$1", status); 1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "bcbcdefg"); 1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1."), status); 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "The value of $1 is bc.defg"); 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher2->replaceFirst("$ by itself, no group number $$$", status); 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "$ by itself, no group number $$$defg"); 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U0001D7CF."); 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru replacement = replacement.unescape(); 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest = matcher2->replaceFirst(replacement, status); 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(dest == "Supplemental Digit 1 bc.defg"); 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT_FAIL(matcher2->replaceFirst("bad capture group number $5...",status), U_INDEX_OUTOFBOUNDS_ERROR); 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Replacement String with \u hex escapes 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString src = "abc 1 abc 2 abc 3"; 1423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\u0043--"); 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(src); 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result = matcher->replaceAll(substitute, status); 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == "--C-- 1 --C-- 2 --C-- 3"); 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString src = "abc !"; 1431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\U00010000--"); 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher->reset(src); 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result = matcher->replaceAll(substitute, status); 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString expected = UnicodeString("--"); 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected.append((UChar32)0x10000); 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected.append("-- !"); 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == expected); 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: need more through testing of capture substitutions. 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Bug 4057 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s = "The matches start with ss and end with ee ss stuff ee fin"; 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m("ss(.*?)ee", 0, status); 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result; 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Multiple finds do NOT bump up the previous appendReplacement postion. 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(s); 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(); 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(); 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.appendReplacement(result, "ooh", status); 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == "The matches start with ss and end with ee ooh"); 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // After a reset into the interior of a string, appendReplacemnt still starts at beginning. 1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.truncate(0); 1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(10, status); 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(); 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(); 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.appendReplacement(result, "ooh", status); 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == "The matches start with ss and end with ee ooh"); 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find() at interior of string, appendReplacemnt still starts at beginning. 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.truncate(0); 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(); 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(10, status); 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.find(); 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.appendReplacement(result, "ooh", status); 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == "The matches start with ss and end with ee ooh"); 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.appendTail(result); 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(result == "The matches start with ss and end with ee ooh fin"); 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher2; 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat2; 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher; 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat; 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// API_Pattern Test that the API for class RegexPattern is 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// present and nominally working. 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Pattern() { 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern pata; // Test default constructor to not crash. 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern patb; 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pata == patb); 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pata == pata); 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re1("abc[a-l][m-z]"); 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString re2("def"); 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat1 = RegexPattern::compile(re1, 0, pe, status); 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat2 = RegexPattern::compile(re2, 0, pe, status); 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1 == *pat1); 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1 != pata); 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Assign 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru patb = *pat1; 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(patb == *pat1); 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy Construct 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern patc(*pat1); 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(patc == *pat1); 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(patb == patc); 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1 != pat2); 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru patb = *pat2; 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(patb != patc); 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(patb == *pat2); 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compile with no flags. 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat1a = RegexPattern::compile(re1, pe, status); 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1a == *pat1); 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1a->flags() == 0); 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compile with different flags should be not equal 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat1b = RegexPattern::compile(re1, UREGEX_CASE_INSENSITIVE, pe, status); 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1b != *pat1a); 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE); 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1a->flags() == 0); 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1b; 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // clone 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat1c = pat1->clone(); 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1c == *pat1); 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(*pat1c != *pat2); 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1c; 1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1a; 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat2; 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Verify that a matcher created from a cloned pattern works. 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (Jitterbug 3423) 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern *pSource = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\p{L}+"), 0, status); 1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pClone = pSource->clone(); 1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pSource; 1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *mFromClone = pClone->matcher(status); 1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s = "Hello World"; 1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mFromClone->reset(s); 1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(mFromClone->find() == TRUE); 1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(mFromClone->group(status) == "Hello"); 1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(mFromClone->find() == TRUE); 1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(mFromClone->group(status) == "World"); 1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(mFromClone->find() == FALSE); 1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete mFromClone; 1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pClone; 1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // matches convenience API 1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches(".*", "random input", pe, status) == TRUE); 1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE); 1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE); 1584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE); 1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE); 1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_INDEX_OUTOFBOUNDS_ERROR; 1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); 1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Split() 1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat1 = RegexPattern::compile(" +", pe, status); 1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString fields[10]; 1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n; 1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("Now is the time", fields, 10, status); 1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==4); 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]=="Now"); 1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="is"); 1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="the"); 1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="time"); 1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]==""); 1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("Now is the time", fields, 2, status); 1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==2); 1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]=="Now"); 1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="is the time"); 1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="the"); // left over from previous test 1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[1] = "*"; 1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("Now is the time", fields, 1, status); 1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==1); 1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]=="Now is the time"); 1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="*"); 1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" Now is the time ", fields, 10, status); 1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1630b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==6); 1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==""); 1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="Now"); 1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="is"); 1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="the"); 1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="time"); 1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[5]==""); 1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" ", fields, 10, status); 1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1640b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==2); 1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==""); 1642b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[1]==""); 1643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[0] = "foo"; 1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("", fields, 10, status); 1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==0); 1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]=="foo"); 1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // split, with a pattern with (capture) 1653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pat1 = RegexPattern::compile(UNICODE_STRING_SIMPLE("<(\\w*)>"), pe, status); 1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status); 1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1659b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==7); 1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==""); 1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="b"); 1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="the time"); 1665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[5]=="c"); 1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[6]==""); 1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(status==U_ZERO_ERROR); 1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" <a>Now is <b>the time<c>", fields, 10, status); 1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1671b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==7); 1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==" "); 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="b"); 1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="the time"); 1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[5]=="c"); 1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[6]==""); 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[6] = "foo"; 1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" <a>Now is <b>the time<c>", fields, 6, status); 1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==6); 1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==" "); 1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="b"); 1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="the time"); 1690b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[5]==""); // All text following "<c>" field delimiter. 1691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[6]=="foo"); 1692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[5] = "foo"; 1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" <a>Now is <b>the time<c>", fields, 5, status); 1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==5); 1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==" "); 1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="b"); 1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="the time<c>"); 1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[5]=="foo"); 1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[5] = "foo"; 1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" <a>Now is <b>the time", fields, 5, status); 1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==5); 1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==" "); 1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="b"); 1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="the time"); 1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[5]=="foo"); 1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split(" <a>Now is <b>the time<c>", fields, 4, status); 1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==4); 1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]==" "); 1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="a"); 1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="Now is "); 1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]=="the time<c>"); 1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat1 = RegexPattern::compile("([-,])", pe, status); 1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n = pat1->split("1-10,20", fields, 10, status); 1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(n==5); 1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[0]=="1"); 1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[1]=="-"); 1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[2]=="10"); 1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[3]==","); 1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(fields[4]=="20"); 1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1740b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Test split of string with empty trailing fields 1741b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pat1 = RegexPattern::compile(",", pe, status); 1742b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 1743b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho n = pat1->split("a,b,c,", fields, 10, status); 1744b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 1745b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==4); 1746b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[0]=="a"); 1747b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[1]=="b"); 1748b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[2]=="c"); 1749b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[3]==""); 1750b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1751b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho n = pat1->split("a,,,", fields, 10, status); 1752b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 1753b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==4); 1754b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[0]=="a"); 1755b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[1]==""); 1756b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[2]==""); 1757b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[3]==""); 1758b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete pat1; 1759b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1760b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Split Separator with zero length match. 1761b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pat1 = RegexPattern::compile(":?", pe, status); 1762b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 1763b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho n = pat1->split("abc", fields, 10, status); 1764b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 1765b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==5); 1766b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[0]==""); 1767b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[1]=="a"); 1768b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[2]=="b"); 1769b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[3]=="c"); 1770b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[4]==""); 1771b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1772b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete pat1; 1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RegexPattern::pattern() 1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat1 = new RegexPattern(); 1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1->pattern() == ""); 1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat1 = RegexPattern::compile("(Hello, world)*", pe, status); 1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1->pattern() == "(Hello, world)*"); 1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // classID functions 1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat1 = RegexPattern::compile("(Hello, world)*", pe, status); 1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_CHECK_STATUS; 1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1->getDynamicClassID() == RegexPattern::getStaticClassID()); 1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1->getDynamicClassID() != NULL); 1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString Hello("Hello, world."); 1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *m = pat1->matcher(Hello, status); 1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(pat1->getDynamicClassID() != m->getDynamicClassID()); 1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m->getDynamicClassID() == RegexMatcher::getStaticClassID()); 1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_ASSERT(m->getDynamicClassID() != NULL); 1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete m; 1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat1; 1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 180650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// API_Match_UTF8 Test that the alternate engine for class RegexMatcher 180750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// is present and working, but excluding functions 180850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// implementing replace operations. 1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 181150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Match_UTF8() { 181250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 181350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 181450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags = 0; 1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 181750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Debug - slide failing test cases early 1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 181950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if 0 182050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 182250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 182350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 182650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Simple pattern compilation 1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 182850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 182950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re = UTEXT_INITIALIZER; 183027f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&re, "abc", -1, &status); 1831b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_VERBOSE_TEXT(&re); 183250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat2; 183350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat2 = RegexPattern::compile(&re, flags, pe, status); 183450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 183650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input1 = UTEXT_INITIALIZER; 183750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input2 = UTEXT_INITIALIZER; 183850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText empty = UTEXT_INITIALIZER; 183927f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &status); 184027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&input1); 184127f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status); 184227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&input2); 184350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&empty, NULL, 0, &status); 184450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 184527f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */ 184650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t input2Len = strlen("not abc"); 1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 184950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 185050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Matcher creation and reset. 185150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 1852b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1); 185350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 185450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(status) == TRUE); 185527f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcdefthisisatest[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x00 }; /* abcdef this is a test */ 185627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); 185750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&input2); 185850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(status) == FALSE); 185927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_notabc[] = { 0x6e, 0x6f, 0x74, 0x20, 0x61, 0x62, 0x63, 0x00 }; /* not abc */ 186027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_notabc, m1->inputText()); 186150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&input1); 186227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); 186350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(status) == TRUE); 186450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&empty); 186550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(status) == FALSE); 186650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(utext_nativeLength(&empty) == 0); 1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 186850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 186950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // reset(pos, status) 187050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 187150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&input1); 187250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(4, status); 187350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 187427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); 187550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(status) == TRUE); 1876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 187750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(-1, status); 187850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 188150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(0, status); 188250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 188350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 188550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(input1Len-1, status); 188650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 188750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 188850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 188950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(input1Len, status); 189027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 189127f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 189227f654740f2a26ad62a5c155af9199af9e69b889claireho 189327f654740f2a26ad62a5c155af9199af9e69b889claireho m1->reset(input1Len+1, status); 189450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 189550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 189850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match(pos, status) 1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 190050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&input2); 190150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(4, status) == TRUE); 190250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(); 190350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(3, status) == FALSE); 190450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(); 190550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(5, status) == FALSE); 190650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(4, status) == TRUE); 190750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(-1, status) == FALSE); 190850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 191050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match() at end of string should fail, but should not 191150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // be an error. 191250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 191350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(input2Len, status) == FALSE); 191450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 191650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match beyond end of string should fail with an error. 191750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 191850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->matches(input2Len+1, status) == FALSE); 191950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 192150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Successful match at end of string. 192250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 192350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 192450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m("A?", 0, status); // will match zero length string. 192550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 192650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&input1); 192750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.matches(input1Len, status) == TRUE); 192850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 192950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&empty); 193050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.matches(0, status) == TRUE); 193150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 193650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // lookingAt(pos, status) 1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 193850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 193950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m1->reset(&input2); // "not abc" 194050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 194150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(5, status) == FALSE); 194250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(3, status) == FALSE); 194350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(4, status) == TRUE); 194450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE); 194550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 194650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 194750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE); 194850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 194950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE); 195050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 195250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete m1; 195350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat2; 195450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 195550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re); 195650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input1); 195750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input2); 195850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&empty); 195950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 196250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 196350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Capture Group. 196450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // RegexMatcher::start(); 196550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // RegexMatcher::end(); 196650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // RegexMatcher::groupCount(); 196750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 196850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 196950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags=0; 197050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 197150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 197250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re=UTEXT_INITIALIZER; 197327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */ 197427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re, str_01234567_pat, -1, &status); 197550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 197650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 197750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 197850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 197950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 198027f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ 198127f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_0123456789, -1, &status); 1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1983b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *matcher = &pat->matcher(status)->reset(&input); 198450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 198550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->lookingAt(status) == TRUE); 198650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static const int32_t matchStarts[] = {0, 2, 4, 8}; 198750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static const int32_t matchEnds[] = {10, 8, 6, 10}; 198850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 198950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<4; i++) { 199050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t actualStart = matcher->start(i, status); 199150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 199250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (actualStart != matchStarts[i]) { 199327f654740f2a26ad62a5c155af9199af9e69b889claireho errln("RegexTest failure at %s:%d, index %d. Expected %d, got %d\n", 199427f654740f2a26ad62a5c155af9199af9e69b889claireho __FILE__, __LINE__, i, matchStarts[i], actualStart); 199550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 199650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t actualEnd = matcher->end(i, status); 199750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 199850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (actualEnd != matchEnds[i]) { 199927f654740f2a26ad62a5c155af9199af9e69b889claireho errln("RegexTest failure at %s:%d index %d. Expected %d, got %d\n", 200027f654740f2a26ad62a5c155af9199af9e69b889claireho __FILE__, __LINE__, i, matchEnds[i], actualEnd); 200150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 200250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 200450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(0, status) == matcher->start(status)); 200550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->end(0, status) == matcher->end(status)); 2006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 200750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 200850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 200950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(); 201050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); 2011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 201250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->lookingAt(status); 201350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 201450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString dest; 201550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText destText = UTEXT_INITIALIZER; 201650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&destText, &dest, &status); 201750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *result; 201827f654740f2a26ad62a5c155af9199af9e69b889claireho //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ 201927f654740f2a26ad62a5c155af9199af9e69b889claireho // Test shallow-clone API 202027f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t group_len; 202127f654740f2a26ad62a5c155af9199af9e69b889claireho result = matcher->group((UText *)NULL, group_len, status); 202250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 202327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 202450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 202527f654740f2a26ad62a5c155af9199af9e69b889claireho result = matcher->group(0, &destText, group_len, status); 202650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 202750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 202827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 202927f654740f2a26ad62a5c155af9199af9e69b889claireho // destText is now immutable, reopen it 203027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_close(&destText); 203127f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUnicodeString(&destText, &dest, &status); 203250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 203350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(0, NULL, status); 203450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 203527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 203650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 203750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(0, &destText, status); 203850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 203950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 204027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 204150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 204250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(1, NULL, status); 204350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 204427f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */ 204527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_234567, result); 204650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 204750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(1, &destText, status); 204850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 204950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 205027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_234567, result); 205150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 205250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(2, NULL, status); 205350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 205427f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */ 205527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_45, result); 205650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 205750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(2, &destText, status); 205850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 205950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 206027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_45, result); 206150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 206250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(3, NULL, status); 206350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 206427f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */ 206527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_89, result); 206650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 206750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->group(3, &destText, status); 206850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 206950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 207027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_89, result); 2071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 207250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 207350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); 207450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(); 207550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); 2076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 207750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 207850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat; 207950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 208050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&destText); 208150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 208250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re); 208350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 208650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // find 2087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 208850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 208950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags=0; 209050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 209150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 209250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re=UTEXT_INITIALIZER; 209327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 209427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re, str_abc, -1, &status); 2095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 209650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 209750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 209850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 209927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ 210027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_abcabcabc, -1, &status); 210150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 012345678901234567 2102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *matcher = &pat->matcher(status)->reset(&input); 210450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 210550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 210650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 1); 210750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 210850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 6); 210950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 211050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 12); 211150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find() == FALSE); 211250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find() == FALSE); 2113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 211450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(); 211550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 211650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 1); 2117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 211850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(0, status)); 211950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 1); 212050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(1, status)); 212150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 1); 212250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(2, status)); 212350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 6); 212450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(12, status)); 212550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 12); 212650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(13, status) == FALSE); 212750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(16, status) == FALSE); 212850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find(17, status) == FALSE); 212950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE); 2130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 213150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 213250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 213350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 213450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR); 2135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 213650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->groupCount() == 0); 213750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 213850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 213950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat; 214050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 214150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 214250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re); 2143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 214550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 214750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // find, with \G in pattern (true if at the end of a previous match). 2148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 214950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 215050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags=0; 215150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 215250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 215350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re=UTEXT_INITIALIZER; 215427f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x00 }; /* .*?(?:(\\Gabc)|(abc)) */ 215527f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re, str_Gabcabc, -1, &status); 2156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 215750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 215850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 216050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 216127f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */ 216227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_abcabcabc, -1, &status); 216350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 012345678901234567 2164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *matcher = &pat->matcher(status)->reset(&input); 216650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 216750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 216850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 0); 216950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(1, status) == -1); 217050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(2, status) == 1); 2171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 217250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->find()); 217350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(status) == 4); 217450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(1, status) == 4); 217550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(matcher->start(2, status) == -1); 217650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 217750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 217850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 217950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat; 218050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 218150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 218250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re); 2183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 218650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // find with zero length matches, match position should bump ahead 218750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // to prevent loops. 2188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 218950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 219050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 219150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 219250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m("(?= ?)", 0, status); // This pattern will zero-length matches anywhere, 219350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // using an always-true look-ahead. 219450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 219550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText s = UTEXT_INITIALIZER; 219650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&s, " ", -1, &status); 219750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&s); 219850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; ; i++) { 219950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (m.find() == FALSE) { 220050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 220150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 220250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.start(status) == i); 220350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.end(status) == i); 220450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 220550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(i==5); 220650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 220750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check that the bump goes over characters outside the BMP OK 220850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // "\\U00010001\\U00010002\\U00010003\\U00010004".unescape()...in UTF-8 220950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho unsigned char aboveBMP[] = {0xF0, 0x90, 0x80, 0x81, 0xF0, 0x90, 0x80, 0x82, 0xF0, 0x90, 0x80, 0x83, 0xF0, 0x90, 0x80, 0x84, 0x00}; 221050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&s, (char *)aboveBMP, -1, &status); 221150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&s); 221227f654740f2a26ad62a5c155af9199af9e69b889claireho for (i=0; ; i+=4) { 221350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (m.find() == FALSE) { 221450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 221550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 221650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.start(status) == i); 221750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.end(status) == i); 2218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 221927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(i==20); 222050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 222150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&s); 222250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 222350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 222450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // find() loop breaking test. 222550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // with pattern of /.?/, should see a series of one char matches, then a single 222650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match of zero length at the end of the input string. 222750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 222850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 222950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m(".?", 0, status); 223050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 223150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText s = UTEXT_INITIALIZER; 223250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&s, " ", -1, &status); 223350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&s); 223450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; ; i++) { 223550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (m.find() == FALSE) { 223650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 223750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 223850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.start(status) == i); 223950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); 224050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 224150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(i==5); 224250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 224350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&s); 2244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 224550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 224850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Matchers with no input string behave as if they had an empty input string. 2249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 2250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 225150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 225250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 225350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m(".?", 0, status); 225450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 225550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.find()); 225650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.start(status) == 0); 225750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.input() == ""); 2258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 225950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 226050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 226150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *p = RegexPattern::compile(".", 0, status); 226250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *m = p->matcher(status); 226350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 226550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m->find() == FALSE); 226650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0); 226750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete m; 226850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete p; 2269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 227050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 227150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 227250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Regions 227350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 227450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 227550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 227650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText testPattern = UTEXT_INITIALIZER; 227750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText testText = UTEXT_INITIALIZER; 227827f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status); 227927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&testPattern); 228027f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status); 228127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&testText); 228250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 228350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m(&testPattern, &testText, 0, status); 228450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 228550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionStart() == 0); 228650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); 228750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 228850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 228950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 229050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.region(2,4, status); 229150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 229250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.matches(status)); 229350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.start(status)==2); 229450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.end(status)==4); 229550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 229650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 229750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(); 229850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionStart() == 0); 229950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); 230050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 230127f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&testText, "short", -1, &status); 230227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&testText); 230350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&testText); 230450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionStart() == 0); 230550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short")); 230650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 230750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 230850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); 230950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 231050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.reset()); 231150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 231250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 231350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); 231450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 231550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.reset()); 231650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 231750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 231850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 231950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); 232050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 232150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.reset()); 232250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 2323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 232450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); 232550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 232650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(&m == &m.reset()); 232750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 232850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 232950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&testText); 233050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&testPattern); 2331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 233250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 233350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 233450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // hitEnd() and requireEnd() 233550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 233650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 233750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 233850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText testPattern = UTEXT_INITIALIZER; 233950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText testText = UTEXT_INITIALIZER; 234027f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */ 234127f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */ 234227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&testPattern, str_, -1, &status); 234327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&testText, str_aabb, -1, &status); 234450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 234550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m1(&testPattern, &testText, 0, status); 234650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1.lookingAt(status) == TRUE); 234750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1.hitEnd() == TRUE); 234850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m1.requireEnd() == FALSE); 234950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 235050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 235150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 235227f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */ 235327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&testPattern, str_a, -1, &status); 235450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m2(&testPattern, &testText, 0, status); 235550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m2.lookingAt(status) == TRUE); 235650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m2.hitEnd() == FALSE); 235750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m2.requireEnd() == FALSE); 235850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 236050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 236127f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */ 236227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&testPattern, str_dotstardollar, -1, &status); 236350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m3(&testPattern, &testText, 0, status); 236450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m3.lookingAt(status) == TRUE); 236550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m3.hitEnd() == TRUE); 236650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(m3.requireEnd() == TRUE); 236750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 236850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 236950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&testText); 237050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&testPattern); 2371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 2373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------- 2376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 237750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// API_Replace_UTF8 API test for class RegexMatcher, testing the 237850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Replace family of functions. 2379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 238150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Replace_UTF8() { 238250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 238350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Replace 238450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 238550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags=0; 238650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 238750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 2388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 238950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re=UTEXT_INITIALIZER; 239027f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&re, "abc", -1, &status); 239127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&re); 239250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 239350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 239450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 239527f654740f2a26ad62a5c155af9199af9e69b889claireho char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ 239650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 012345678901234567 239750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText dataText = UTEXT_INITIALIZER; 239850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&dataText, data, -1, &status); 239927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 240027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&dataText); 2401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText); 2402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 240350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 240450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Plain vanilla matches. 240550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 240650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString dest; 240750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText destText = UTEXT_INITIALIZER; 240850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&destText, &dest, &status); 240950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *result; 241050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 241150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replText = UTEXT_INITIALIZER; 241250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 241327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */ 241427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_yz, -1, &status); 241527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_VERBOSE_TEXT(&replText); 241650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, NULL, status); 241750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 241827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */ 241927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); 242050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 242150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, &destText, status); 242250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 242350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 242427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); 2425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 242650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 242750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 242827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_yzyzyz[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x00 }; /* .yz..yz...yz.. */ 242927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); 243050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 243250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 243350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 243450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 243550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 243627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); 243750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 243850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 243950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Plain vanilla non-matches. 244050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 244127f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */ 244227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_abxabxabx, -1, &status); 244350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 244450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 244550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, NULL, status); 244650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 244727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 244850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 244950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, &destText, status); 245050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 245150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 245227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 245350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 245450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 245550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 245627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 245750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 245850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 245950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 246050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 246150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 246227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 246350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 246450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 246550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Empty source string 246650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 246750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&dataText, NULL, 0, &status); 246850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 246950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 247050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, NULL, status); 247150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 247227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8("", result); 247350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 247450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, &destText, status); 247550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 247650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 247727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8("", result); 247850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 247950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 248050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 248127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8("", result); 248250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 248350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 248450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 248550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 248627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8("", result); 248750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 248850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 248950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Empty substitution string 249050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 249150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.." 249250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 249350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 249450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&replText, NULL, 0, &status); 249550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, NULL, status); 249650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 249727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */ 249827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); 249950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 250050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, &destText, status); 250150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 250250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 250327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); 250450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 250550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 250650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 250727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_dots[] = { 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x00 }; /* ........ */ 250827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_dots, result); 250950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 251050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 251150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 251250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 251350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 251427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_dots, result); 251550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 251650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 251750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match whole string 251850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 251927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 252027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_abc, -1, &status); 252150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 252250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 252327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_xyz[] = { 0x78, 0x79, 0x7a, 0x00 }; /* xyz */ 252427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_xyz, -1, &status); 252550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, NULL, status); 252650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 252727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 252850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 252950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 253050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceFirst(&replText, &destText, status); 253150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 253250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 253327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 253450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 253550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 253650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 253727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 253850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 253950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 254050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 254150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 254250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 254327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); 254450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 254550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 254650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Capture Group, simple case 254750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 254827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */ 254927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re, str_add, -1, &status); 255050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status); 255150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 255227f654740f2a26ad62a5c155af9199af9e69b889claireho 255327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* abcdefg */ 255427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_abcdefg, -1, &status); 2555b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText); 255650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 255750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 255827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */ 255927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_11, -1, &status); 256050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, NULL, status); 256150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 256227f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* bcbcdefg */ 256327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); 256450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 256550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 256650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, &destText, status); 256750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 256850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 256927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); 2570b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 2571b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */ 2572b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho utext_openUTF8(&replText, str_v, -1, &status); 2573b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_VERBOSE_TEXT(&replText); 257450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, NULL, status); 257550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 257627f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg */ 257727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); 257850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 257950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 258050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, &destText, status); 258150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 258250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 258327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); 258450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 258527f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */ 258627f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); 258750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, NULL, status); 258850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 258927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */ 259027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); 259150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 259250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, &destText, status); 259450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 259550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 259627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); 259750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 259827f654740f2a26ad62a5c155af9199af9e69b889claireho unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx. */ 259927f654740f2a26ad62a5c155af9199af9e69b889claireho //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001D7CF, MATHEMATICAL BOLD DIGIT ONE 260050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 012345678901234567890123456 260150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho supplDigitChars[22] = 0xF0; 260250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho supplDigitChars[23] = 0x9D; 260350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho supplDigitChars[24] = 0x9F; 260450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho supplDigitChars[25] = 0x8F; 260550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status); 260650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 260750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, NULL, status); 260850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 260927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */ 261027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); 261150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 261250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 261350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher2->replaceFirst(&replText, &destText, status); 261450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 261550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 261627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); 261727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /* bad capture group number $5..." */ 261827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status); 261950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status)), U_INDEX_OUTOFBOUNDS_ERROR); 262027f654740f2a26ad62a5c155af9199af9e69b889claireho// REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); 262150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 262250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 262350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, status)), U_INDEX_OUTOFBOUNDS_ERROR); 262450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 262527f654740f2a26ad62a5c155af9199af9e69b889claireho// REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); 262650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 262750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 262850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Replacement String with \u hex escapes 262950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 2630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 263127f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61, 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 abc 2 abc 3 */ 263227f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33, 0x2d, 0x2d, 0x00 }; /* --\u0043-- */ 263327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status); 263427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_u0043, -1, &status); 263550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 263650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 263750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 263850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 263927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */ 264027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); 264150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 264250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 264350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 264450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 264550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 264627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); 264750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 264850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 264927f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */ 265027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_abc, -1, &status); 265127f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */ 265227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_U00010000, -1, &status); 265350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->reset(&dataText); 265450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 265527f654740f2a26ad62a5c155af9199af9e69b889claireho unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A" 265650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 0123456789 265750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expected[2] = 0xF0; 265850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expected[3] = 0x90; 265950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expected[4] = 0x80; 266050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expected[5] = 0x80; 266150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 266250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, NULL, status); 266350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 266427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); 266550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(result); 266650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); 266750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = matcher->replaceAll(&replText, &destText, status); 266850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 266950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &destText); 267027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); 2671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 267250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: need more through testing of capture substitutions. 2673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 267450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Bug 4057 267550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 267650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 267750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 267827f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65, 0x00 }; /* ss(.*?)ee */ 267927f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66, 0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ss stuff ee fin */ 268027f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ 268127f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re, str_ssee, -1, &status); 268227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&dataText, str_blah, -1, &status); 268327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&replText, str_ooh, -1, &status); 268450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 268550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m(&re, 0, status); 268650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 268750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 268850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString result; 268950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText resultText = UTEXT_INITIALIZER; 269050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &result, &status); 2691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 269250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Multiple finds do NOT bump up the previous appendReplacement postion. 269350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(&dataText); 269450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(); 269550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(); 269650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.appendReplacement(&resultText, &replText, status); 269750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 269827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_blah2[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ 269927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_blah2, &resultText); 2700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 270150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // After a reset into the interior of a string, appendReplacement still starts at beginning. 270250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 270350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result.truncate(0); 270450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &result, &status); 270550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(10, status); 270650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(); 270750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(); 270850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.appendReplacement(&resultText, &replText, status); 270950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 271027f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_blah3[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ 271127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_blah3, &resultText); 2712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 271350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // find() at interior of string, appendReplacement still starts at beginning. 271450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 271550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result.truncate(0); 271650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &result, &status); 271750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.reset(); 271850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(10, status); 271950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.find(); 272050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho m.appendReplacement(&resultText, &replText, status); 272150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 272227f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ 272327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText); 2724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 272527f654740f2a26ad62a5c155af9199af9e69b889claireho m.appendTail(&resultText, status); 272627f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */ 272727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText); 272850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 272950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&resultText); 273050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2731b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 273250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher2; 273350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat2; 273450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 273550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat; 273650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 273750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&dataText); 273850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replText); 273950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&destText); 274050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re); 2741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 274450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 2745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 274650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// API_Pattern_UTF8 Test that the API for class RegexPattern is 274750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// present and nominally working. 274850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 274950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 275050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Pattern_UTF8() { 275150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern pata; // Test default constructor to not crash. 275250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern patb; 275350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 275450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pata == patb); 275550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pata == pata); 275650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 275750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re1 = UTEXT_INITIALIZER; 275850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText re2 = UTEXT_INITIALIZER; 275950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 276050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 276150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 276227f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */ 276327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */ 276427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re1, str_abcalmz, -1, &status); 276527f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re2, str_def, -1, &status); 276650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 276750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status); 276850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status); 276950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 277050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1 == *pat1); 277150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1 != pata); 277250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 277350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Assign 277450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patb = *pat1; 277550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(patb == *pat1); 277650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 277750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Copy Construct 277850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern patc(*pat1); 277950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(patc == *pat1); 278050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(patb == patc); 278150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pat1 != pat2); 278250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patb = *pat2; 278350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(patb != patc); 278450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(patb == *pat2); 278550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 278650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compile with no flags. 278750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat1a = RegexPattern::compile(&re1, pe, status); 278850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1a == *pat1); 278950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 279050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pat1a->flags() == 0); 279150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 279250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compile with different flags should be not equal 279350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat1b = RegexPattern::compile(&re1, UREGEX_CASE_INSENSITIVE, pe, status); 279450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 279550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 279650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1b != *pat1a); 279750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE); 279850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pat1a->flags() == 0); 279950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1b; 280050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 280150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // clone 280250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat1c = pat1->clone(); 280350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1c == *pat1); 280450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(*pat1c != *pat2); 280550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 280650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1c; 280750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1a; 280850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 280950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat2; 281050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 281150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re1); 281250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re2); 281350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 281450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 281550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 281650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Verify that a matcher created from a cloned pattern works. 281750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (Jitterbug 3423) 281850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 281950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 282050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 282150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText pattern = UTEXT_INITIALIZER; 282227f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */ 282327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_pL, -1, &status); 282450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 282550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status); 282650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pClone = pSource->clone(); 282750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pSource; 282850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *mFromClone = pClone->matcher(status); 282950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 283050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 283150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 283227f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */ 283327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_HelloWorld, -1, &status); 283450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho mFromClone->reset(&input); 283550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(mFromClone->find() == TRUE); 283650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(mFromClone->group(status) == "Hello"); 283750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(mFromClone->find() == TRUE); 283850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(mFromClone->group(status) == "World"); 283950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(mFromClone->find() == FALSE); 284050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete mFromClone; 284150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pClone; 284250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 284350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 284450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&pattern); 284550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 284650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 284750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 284850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // matches convenience API 284950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 285050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 285150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 285250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText pattern = UTEXT_INITIALIZER; 285350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 285450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 285527f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */ 285627f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_randominput, -1, &status); 285750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 285827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */ 285927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_dotstar, -1, &status); 286050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE); 286150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 286250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 286327f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 286427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_abc, -1, &status); 286550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE); 286650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 286750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 286827f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */ 286927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_nput, -1, &status); 287050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE); 287150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 287250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 287327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_randominput, -1, &status); 287450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE); 287550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 287650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 287727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */ 287827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_u, -1, &status); 287950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE); 288050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 288150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 288227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&input, str_abc, -1, &status); 288327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&pattern, str_abc, -1, &status); 288450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 288550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); 288650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 288750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 288850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); 288950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&pattern); 289050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 289150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 289250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 289350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 289450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Split() 289550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 289650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 289727f654740f2a26ad62a5c155af9199af9e69b889claireho const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */ 289827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&re1, str_spaceplus, -1, &status); 289950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat1 = RegexPattern::compile(&re1, pe, status); 290050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 290150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString fields[10]; 290250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 290350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t n; 290450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("Now is the time", fields, 10, status); 290550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 290650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==4); 290750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]=="Now"); 290850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="is"); 290950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="the"); 291050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="time"); 291150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]==""); 291250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 291350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("Now is the time", fields, 2, status); 291450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 291550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==2); 291650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]=="Now"); 291750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="is the time"); 291850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="the"); // left over from previous test 291950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 292050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[1] = "*"; 292150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 292250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("Now is the time", fields, 1, status); 292350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 292450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==1); 292550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]=="Now is the time"); 292650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="*"); 292750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 292850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 292950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" Now is the time ", fields, 10, status); 293050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2931b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==6); 293250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==""); 293350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="Now"); 293450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="is"); 293550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="the"); 293650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="time"); 293750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[5]==""); 2938b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[6]==""); 293950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2940b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fields[2] = "*"; 294150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" ", fields, 10, status); 294250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2943b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==2); 294450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==""); 2945b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[1]==""); 2946b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[2]=="*"); 294750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 294850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[0] = "foo"; 294950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("", fields, 10, status); 295050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 295150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==0); 295250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]=="foo"); 295350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 295450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 295550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 295650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // split, with a pattern with (capture) 295727f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&re1, "<(\\w*)>", -1, &status); 295850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat1 = RegexPattern::compile(&re1, pe, status); 295950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 296050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 296150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 2962b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fields[6] = fields[7] = "*"; 296350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status); 296450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2965b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==7); 296650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==""); 296750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 296850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 296950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="b"); 297050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="the time"); 297150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[5]=="c"); 297250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[6]==""); 2973b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[7]=="*"); 297450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status==U_ZERO_ERROR); 297550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2976b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fields[6] = fields[7] = "*"; 297750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" <a>Now is <b>the time<c>", fields, 10, status); 297850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 2979b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(n==7); 298050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==" "); 298150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 298250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 298350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="b"); 298450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="the time"); 298550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[5]=="c"); 298650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[6]==""); 2987b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[7]=="*"); 298850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 298950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 299050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[6] = "foo"; 2991b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho n = pat1->split(" <a>Now is <b>the time<c> ", fields, 6, status); 299250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 299350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==6); 299450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==" "); 299550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 299650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 299750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="b"); 299850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="the time"); 2999b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(fields[5]==" "); 300050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[6]=="foo"); 300150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 300250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 300350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[5] = "foo"; 300450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" <a>Now is <b>the time<c>", fields, 5, status); 300550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 300650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==5); 300750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==" "); 300850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 300950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 301050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="b"); 301150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="the time<c>"); 301250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[5]=="foo"); 301350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 301450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 301550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[5] = "foo"; 301650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" <a>Now is <b>the time", fields, 5, status); 301750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 301850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==5); 301950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==" "); 302050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 302150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 302250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="b"); 302350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="the time"); 302450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[5]=="foo"); 302550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 302650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 302750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split(" <a>Now is <b>the time<c>", fields, 4, status); 302850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 302950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==4); 303050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]==" "); 303150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="a"); 303250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="Now is "); 303350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]=="the time<c>"); 303450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 303550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 303650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 303727f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&re1, "([-,])", -1, &status); 303850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat1 = RegexPattern::compile(&re1, pe, status); 303950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 304050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n = pat1->split("1-10,20", fields, 10, status); 304150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 304250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(n==5); 304350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[0]=="1"); 304450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[1]=="-"); 304550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[2]=="10"); 304650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[3]==","); 304750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(fields[4]=="20"); 304850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 304950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 305050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 305150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 305250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // RegexPattern::pattern() and patternText() 305350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 305450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat1 = new RegexPattern(); 305550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(pat1->pattern() == ""); 305627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText(status)); 305750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 3058b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const char *helloWorldInvariant = "(Hello, world)*"; 3059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status); 306050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat1 = RegexPattern::compile(&re1, pe, status); 306150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 3062b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT_UNISTR(pat1->pattern(),"(Hello, world)*"); 306327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status)); 306450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 306550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 306650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&re1); 306750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 306850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 306950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 307050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 307150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 307250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Extended A more thorough check for features of regex patterns 307350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The test cases are in a separate data file, 307450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// source/tests/testdata/regextst.txt 307550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// A description of the test data format is included in that file. 307650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 307750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 307850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 307950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst char * 308050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexTest::getPath(char buffer[2048], const char *filename) { 308150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 308250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *testDataDirectory = IntlTest::getSourceTestData(status); 308350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 308450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("ERROR: loadTestData() failed - %s", u_errorName(status)); 308550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 308650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 308750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 308850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho strcpy(buffer, testDataDirectory); 308950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho strcat(buffer, filename); 309050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return buffer; 309150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 309250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 309350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Extended() { 309450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char tdd[2048]; 309550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *srcPath; 309650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 309750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t lineNum = 0; 309850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 309950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 310050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Open and read the test data file. 310150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 310250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcPath=getPath(tdd, "regextst.txt"); 310350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(srcPath==NULL) { 310450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; /* something went wrong, error already output */ 310550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 310650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 310750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len; 310850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *testData = ReadAndConvertFile(srcPath, len, "utf-8", status); 310950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 311050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; /* something went wrong, error already output */ 311150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 311250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 311350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 311450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Put the test data into a UnicodeString 311550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 311650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testString(FALSE, testData, len); 311750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 311850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher quotedStuffMat(UNICODE_STRING_SIMPLE("\\s*([\\'\\\"/])(.*?)\\1"), 0, status); 311950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher commentMat (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status); 312050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher flagsMat (UNICODE_STRING_SIMPLE("\\s*([ixsmdteDEGLMvabtyYzZ2-9]*)([:letter:]*)"), 0, status); 312150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 312250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher lineMat(UNICODE_STRING_SIMPLE("(.*?)\\r?\\n"), testString, 0, status); 312350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testPattern; // The pattern for test from the test file. 312450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testFlags; // the flags for a test. 312550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString matchString; // The marked up string to be used as input 312650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 312750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)){ 312850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Construct RegexMatcher() error."); 312950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete [] testData; 313050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 313150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 313250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 313350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 313450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop over the test data file, once per line. 313550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 313650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (lineMat.find()) { 313750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lineNum++; 313850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 313927f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s:%d: ICU Error \"%s\"", srcPath, lineNum, u_errorName(status)); 314050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 314150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 314250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 314350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testLine = lineMat.group(1, status); 314450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (testLine.length() == 0) { 314550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 314650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 314750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 314850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 314950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Parse the test line. Skip blank and comment only lines. 315050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Separate out the three main fields - pattern, flags, target. 315150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 315250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 315350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho commentMat.reset(testLine); 315450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (commentMat.lookingAt(status)) { 315550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This line is a comment, or blank. 315650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 315750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 315850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 315950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 316050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pull out the pattern field, remove it from the test file line. 316150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 316250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho quotedStuffMat.reset(testLine); 316350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (quotedStuffMat.lookingAt(status)) { 316450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testPattern = quotedStuffMat.group(2, status); 316550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testLine.remove(0, quotedStuffMat.end(0, status)); 316650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 316727f654740f2a26ad62a5c155af9199af9e69b889claireho errln("Bad pattern (missing quotes?) at %s:%d", srcPath, lineNum); 316850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 316950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 317050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 317150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 317250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 317350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pull out the flags from the test file line. 317450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 317550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flagsMat.reset(testLine); 317650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flagsMat.lookingAt(status); // Will always match, possibly an empty string. 317750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testFlags = flagsMat.group(1, status); 317850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flagsMat.group(2, status).length() > 0) { 317950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Bad Match flag at line %d. Scanning %c\n", 318050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lineNum, flagsMat.group(2, status).charAt(0)); 318150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 318250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 318350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testLine.remove(0, flagsMat.end(0, status)); 318450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 318550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 318650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pull out the match string, as a whole. 318750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We'll process the <tags> later. 318850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 318950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho quotedStuffMat.reset(testLine); 319050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (quotedStuffMat.lookingAt(status)) { 319150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchString = quotedStuffMat.group(2, status); 319250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testLine.remove(0, quotedStuffMat.end(0, status)); 319350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 319450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Bad match string at test file line %d", lineNum); 319550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 319650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 319750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 319850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 319950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The only thing left from the input line should be an optional trailing comment. 320050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 320150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho commentMat.reset(testLine); 320250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (commentMat.lookingAt(status) == FALSE) { 320350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Line %d: unexpected characters at end of test line.", lineNum); 320450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 320550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 320650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 320750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 320850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Run the test 320950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 321027f654740f2a26ad62a5c155af9199af9e69b889claireho regex_find(testPattern, testFlags, matchString, srcPath, lineNum); 321150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 321250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete [] testData; 321450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 321650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 322050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 322150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// regex_find(pattern, flags, inputString, lineNumber) 322250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 322350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Function to run a single test from the Extended (data driven) tests. 322450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// See file test/testdata/regextst.txt for a description of the 322550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// pattern and inputString fields, and the allowed flags. 322650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// lineNumber is the source line in regextst.txt of the test. 322750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 322850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 322950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 323050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 323150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Set a value into a UVector at position specified by a decimal number in 323250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// a UnicodeString. This is a utility function needed by the actual test function, 323350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// which follows. 323450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic void set(UVector &vec, int32_t val, UnicodeString index) { 323550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status=U_ZERO_ERROR; 323650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t idx = 0; 323750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (int32_t i=0; i<index.length(); i++) { 323850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t d=u_charDigitValue(index.charAt(i)); 323950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (d<0) {return;} 324050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho idx = idx*10 + d; 324150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 324250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (vec.size()<idx+1) {vec.addElement(-1, status);} 324350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho vec.setElementAt(val, idx); 324450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 324550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 324627f654740f2a26ad62a5c155af9199af9e69b889clairehostatic void setInt(UVector &vec, int32_t val, int32_t idx) { 324727f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status=U_ZERO_ERROR; 324827f654740f2a26ad62a5c155af9199af9e69b889claireho while (vec.size()<idx+1) {vec.addElement(-1, status);} 324927f654740f2a26ad62a5c155af9199af9e69b889claireho vec.setElementAt(val, idx); 325027f654740f2a26ad62a5c155af9199af9e69b889claireho} 325127f654740f2a26ad62a5c155af9199af9e69b889claireho 325227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool utextOffsetToNative(UText *utext, int32_t unistrOffset, int32_t& nativeIndex) 325327f654740f2a26ad62a5c155af9199af9e69b889claireho{ 325427f654740f2a26ad62a5c155af9199af9e69b889claireho UBool couldFind = TRUE; 325527f654740f2a26ad62a5c155af9199af9e69b889claireho UTEXT_SETNATIVEINDEX(utext, 0); 325627f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t i = 0; 325727f654740f2a26ad62a5c155af9199af9e69b889claireho while (i < unistrOffset) { 325827f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c = UTEXT_NEXT32(utext); 325927f654740f2a26ad62a5c155af9199af9e69b889claireho if (c != U_SENTINEL) { 326027f654740f2a26ad62a5c155af9199af9e69b889claireho i += U16_LENGTH(c); 326127f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 326227f654740f2a26ad62a5c155af9199af9e69b889claireho couldFind = FALSE; 326327f654740f2a26ad62a5c155af9199af9e69b889claireho break; 326427f654740f2a26ad62a5c155af9199af9e69b889claireho } 326527f654740f2a26ad62a5c155af9199af9e69b889claireho } 3266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho nativeIndex = (int32_t)UTEXT_GETNATIVEINDEX(utext); 326727f654740f2a26ad62a5c155af9199af9e69b889claireho return couldFind; 326827f654740f2a26ad62a5c155af9199af9e69b889claireho} 326927f654740f2a26ad62a5c155af9199af9e69b889claireho 327027f654740f2a26ad62a5c155af9199af9e69b889claireho 327150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::regex_find(const UnicodeString &pattern, 327250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &flags, 327350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &inputString, 327427f654740f2a26ad62a5c155af9199af9e69b889claireho const char *srcPath, 327550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t line) { 327650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString unEscapedInput; 327750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString deTaggedInput; 327850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 327950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t patternUTF8Length, inputUTF8Length; 328050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *patternChars = NULL, *inputChars = NULL; 328150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText patternText = UTEXT_INITIALIZER; 328250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText inputText = UTEXT_INITIALIZER; 328350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UConverter *UTF8Converter = NULL; 328450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 328550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 328650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 328750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *parsePat = NULL; 328850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *parseMatcher = NULL; 328950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *callerPattern = NULL, *UTF8Pattern = NULL; 329050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *matcher = NULL, *UTF8Matcher = NULL; 329150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UVector groupStarts(status); 329250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UVector groupEnds(status); 329327f654740f2a26ad62a5c155af9199af9e69b889claireho UVector groupStartsUTF8(status); 329427f654740f2a26ad62a5c155af9199af9e69b889claireho UVector groupEndsUTF8(status); 329550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isMatch = FALSE, isUTF8Match = FALSE; 329650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool failed = FALSE; 329750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t numFinds; 329850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 329950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool useMatchesFunc = FALSE; 330050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool useLookingAtFunc = FALSE; 330150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t regionStart = -1; 330250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t regionEnd = -1; 330327f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t regionStartUTF8 = -1; 330427f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t regionEndUTF8 = -1; 330527f654740f2a26ad62a5c155af9199af9e69b889claireho 330650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 330750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 330850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compile the caller's pattern 330950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 331050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t bflags = 0; 331150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x69) >= 0) { // 'i' flag 331250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_CASE_INSENSITIVE; 331350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 331450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x78) >= 0) { // 'x' flag 331550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_COMMENTS; 331650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 331750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x73) >= 0) { // 's' flag 331850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_DOTALL; 331950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 332050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag 332150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_MULTILINE; 332250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 332350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 332450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag 332550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES; 332650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 332750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag 332850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bflags |= UREGEX_UNIX_LINES; 332950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 333050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 333150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 333250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho callerPattern = RegexPattern::compile(pattern, bflags, pe, status); 333350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status != U_ZERO_ERROR) { 333450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #if UCONFIG_NO_BREAK_ITERATION==1 333550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 'v' test flag means that the test pattern should not compile if ICU was configured 333650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // to not include break iteration. RBBI is needed for Unicode word boundaries. 333750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) { 333850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 333950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 334050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 334150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E' 334250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Expected pattern compilation error. 334350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd' 334450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho logln("Pattern Compile returns \"%s\"", u_errorName(status)); 334550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 334650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 334750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 334850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Unexpected pattern compilation error. 3349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho dataerrln("Line %d: error %s compiling pattern.", line, u_errorName(status)); 335050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 335150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 335250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 335350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 335450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Converter = ucnv_open("UTF8", &status); 335550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); 335650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 335750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status); 335850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; // buffer overflow 335950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars = new char[patternUTF8Length+1]; 336050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status); 336150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status); 336250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 336350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_ZERO_ERROR) { 336450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status); 336550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 336650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status != U_ZERO_ERROR) { 336750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==1 336850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 'v' test flag means that the test pattern should not compile if ICU was configured 336950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // to not include break iteration. RBBI is needed for Unicode word boundaries. 337050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) { 337150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 337250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 337350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 337450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E' 337550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Expected pattern compilation error. 337650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd' 337750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(status)); 337850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 337950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 338050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 338150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Unexpected pattern compilation error. 338250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Line %d: error %s compiling pattern. (UTF8)", line, u_errorName(status)); 338350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 338450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 338550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 338650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 338750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 338850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Pattern == NULL) { 338950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine 339027f654740f2a26ad62a5c155af9199af9e69b889claireho logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line); 339150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 339250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 339350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 339450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag 339550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPatternDump(callerPattern); 339650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 339750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 339850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag 339927f654740f2a26ad62a5c155af9199af9e69b889claireho errln("%s, Line %d: Expected, but did not get, a pattern compilation error.", srcPath, line); 340050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 340150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 340250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 340350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 340450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 340550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Number of times find() should be called on the test string, default to 1 340650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 340750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho numFinds = 1; 340850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=2; i<=9; i++) { 340950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)(0x30 + i)) >= 0) { // digit flag 341050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (numFinds != 1) { 341150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Line %d: more than one digit flag. Scanning %d.", line, i); 341250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 341350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 341450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho numFinds = i; 341550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 341650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 341750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 341850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 'M' flag. Use matches() instead of find() 341950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x4d) >= 0) { 342050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho useMatchesFunc = TRUE; 342150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 342250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x4c) >= 0) { 342350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho useLookingAtFunc = TRUE; 342450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 342550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 342650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 342750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Find the tags in the input data, remove them, and record the group boundary 342850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // positions. 342950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 343050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho parsePat = RegexPattern::compile("<(/?)(r|[0-9]+)>", 0, pe, status); 343150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 343250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 343350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho unEscapedInput = inputString.unescape(); 343450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho parseMatcher = parsePat->matcher(unEscapedInput, status); 343550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 343650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(parseMatcher->find()) { 343750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho parseMatcher->appendReplacement(deTaggedInput, "", status); 343850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 343950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString groupNum = parseMatcher->group(2, status); 344050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (groupNum == "r") { 344150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // <r> or </r>, a region specification within the string 344250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (parseMatcher->group(1, status) == "/") { 344350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regionEnd = deTaggedInput.length(); 344450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 344550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regionStart = deTaggedInput.length(); 344650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 344750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 344850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // <digits> or </digits>, a group match boundary tag. 344950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (parseMatcher->group(1, status) == "/") { 345050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho set(groupEnds, deTaggedInput.length(), groupNum); 345150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 345250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho set(groupStarts, deTaggedInput.length(), groupNum); 345350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 345450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 345550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 345650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho parseMatcher->appendTail(deTaggedInput); 345750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT_L(groupStarts.size() == groupEnds.size(), line); 345850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((regionStart>=0 || regionEnd>=0) && (regionStart<0 || regionStart>regionEnd)) { 345950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("mismatched <r> tags"); 346050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 346150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 346250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 346350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 346450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 346550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Configure the matcher according to the flags specified with this test. 346650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 346750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher = callerPattern->matcher(deTaggedInput, status); 346850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 346950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag 347050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->setTrace(TRUE); 347150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 347250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 347350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Pattern != NULL) { 347450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status); 347550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; // buffer overflow 347650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputChars = new char[inputUTF8Length+1]; 347750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, status); 347850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status); 347950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 348050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_ZERO_ERROR) { 3481b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText); 348250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 348350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 348450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 348550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher == NULL) { 348650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine 348727f654740f2a26ad62a5c155af9199af9e69b889claireho logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line); 348850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 348950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 349050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 349150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 349227f654740f2a26ad62a5c155af9199af9e69b889claireho // 349327f654740f2a26ad62a5c155af9199af9e69b889claireho // Generate native indices for UTF8 versions of region and capture group info 349427f654740f2a26ad62a5c155af9199af9e69b889claireho // 349527f654740f2a26ad62a5c155af9199af9e69b889claireho if (UTF8Matcher != NULL) { 349627f654740f2a26ad62a5c155af9199af9e69b889claireho if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8); 349727f654740f2a26ad62a5c155af9199af9e69b889claireho if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8); 349827f654740f2a26ad62a5c155af9199af9e69b889claireho 349927f654740f2a26ad62a5c155af9199af9e69b889claireho // Fill out the native index UVector info. 350027f654740f2a26ad62a5c155af9199af9e69b889claireho // Only need 1 loop, from above we know groupStarts.size() = groupEnds.size() 350127f654740f2a26ad62a5c155af9199af9e69b889claireho for (i=0; i<groupStarts.size(); i++) { 350227f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t start = groupStarts.elementAti(i); 350327f654740f2a26ad62a5c155af9199af9e69b889claireho // -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting 350427f654740f2a26ad62a5c155af9199af9e69b889claireho if (start >= 0) { 350527f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t startUTF8; 350627f654740f2a26ad62a5c155af9199af9e69b889claireho if (!utextOffsetToNative(&inputText, start, startUTF8)) { 350727f654740f2a26ad62a5c155af9199af9e69b889claireho errln("Error at line %d: could not find native index for group start %d. UTF16 index %d", line, i, start); 350827f654740f2a26ad62a5c155af9199af9e69b889claireho failed = TRUE; 350927f654740f2a26ad62a5c155af9199af9e69b889claireho goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 351027f654740f2a26ad62a5c155af9199af9e69b889claireho } 351127f654740f2a26ad62a5c155af9199af9e69b889claireho setInt(groupStartsUTF8, startUTF8, i); 351227f654740f2a26ad62a5c155af9199af9e69b889claireho } 351327f654740f2a26ad62a5c155af9199af9e69b889claireho 351427f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t end = groupEnds.elementAti(i); 351527f654740f2a26ad62a5c155af9199af9e69b889claireho // -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting 351627f654740f2a26ad62a5c155af9199af9e69b889claireho if (end >= 0) { 351727f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t endUTF8; 351827f654740f2a26ad62a5c155af9199af9e69b889claireho if (!utextOffsetToNative(&inputText, end, endUTF8)) { 351927f654740f2a26ad62a5c155af9199af9e69b889claireho errln("Error at line %d: could not find native index for group end %d. UTF16 index %d", line, i, end); 352027f654740f2a26ad62a5c155af9199af9e69b889claireho failed = TRUE; 352127f654740f2a26ad62a5c155af9199af9e69b889claireho goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 352227f654740f2a26ad62a5c155af9199af9e69b889claireho } 352327f654740f2a26ad62a5c155af9199af9e69b889claireho setInt(groupEndsUTF8, endUTF8, i); 352427f654740f2a26ad62a5c155af9199af9e69b889claireho } 352527f654740f2a26ad62a5c155af9199af9e69b889claireho } 352627f654740f2a26ad62a5c155af9199af9e69b889claireho } 352727f654740f2a26ad62a5c155af9199af9e69b889claireho 352850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (regionStart>=0) { 352950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->region(regionStart, regionEnd, status); 353050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 353150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 353227f654740f2a26ad62a5c155af9199af9e69b889claireho UTF8Matcher->region(regionStartUTF8, regionEndUTF8, status); 353350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 353450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 353550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 353650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x61) >= 0) { // 'a' anchoring bounds flag 353750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->useAnchoringBounds(FALSE); 353850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 353950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->useAnchoringBounds(FALSE); 354050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 354150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 354250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x62) >= 0) { // 'b' transparent bounds flag 354350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->useTransparentBounds(TRUE); 354450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 354550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->useTransparentBounds(TRUE); 354650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 354750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 354850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 354950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 355050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 355150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 355250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Do a find on the de-tagged input using the caller's pattern 355350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: error on count>1 and not find(). 355450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // error on both matches() and lookingAt(). 355550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 355650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<numFinds; i++) { 355750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (useMatchesFunc) { 355850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = matcher->matches(status); 355950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 356050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isUTF8Match = UTF8Matcher->matches(status); 356150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 356250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (useLookingAtFunc) { 356350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = matcher->lookingAt(status); 356450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 356550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isUTF8Match = UTF8Matcher->lookingAt(status); 356650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 356750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 356850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = matcher->find(); 356950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTF8Matcher != NULL) { 357050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isUTF8Match = UTF8Matcher->find(); 357150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 357250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 357350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 357450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->setTrace(FALSE); 357550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 357650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 357750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match up the groups from the find() with the groups from the tags 357850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 357950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 358050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // number of tags should match number of groups from find operation. 358150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // matcher->groupCount does not include group 0, the entire match, hence the +1. 358250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // G option in test means that capture group data is not available in the 358350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // expected results, so the check needs to be suppressed. 358450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (isMatch == FALSE && groupStarts.size() != 0) { 3585b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho dataerrln("Error at line %d: Match expected, but none found.", line); 358650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 358750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 358850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && isUTF8Match == FALSE && groupStarts.size() != 0) { 358950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: Match expected, but none found. (UTF8)", line); 359050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 359150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 359250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 359350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 359450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flags.indexOf((UChar)0x47 /*G*/) >= 0) { 359550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Only check for match / no match. Don't check capture groups. 359650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (isMatch && groupStarts.size() == 0) { 359750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: No match expected, but one found.", line); 359850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 359950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && isUTF8Match && groupStarts.size() == 0) { 360050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: No match expected, but one found. (UTF8)", line); 360150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 360250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 360350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; 360450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 360550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 360650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS_L(line); 360750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<=matcher->groupCount(); i++) { 360850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t expectedStart = (i >= groupStarts.size()? -1 : groupStarts.elementAti(i)); 360927f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t expectedStartUTF8 = (i >= groupStartsUTF8.size()? -1 : groupStartsUTF8.elementAti(i)); 361050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (matcher->start(i, status) != expectedStart) { 361150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: incorrect start position for group %d. Expected %d, got %d", 361250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, i, expectedStart, matcher->start(i, status)); 361350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 361450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 361527f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expectedStartUTF8) { 361650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: incorrect start position for group %d. Expected %d, got %d (UTF8)", 361727f654740f2a26ad62a5c155af9199af9e69b889claireho line, i, expectedStartUTF8, UTF8Matcher->start(i, status)); 361850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 361950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 362050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 362150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 362250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i)); 362327f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i)); 362450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (matcher->end(i, status) != expectedEnd) { 362550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: incorrect end position for group %d. Expected %d, got %d", 362650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, i, expectedEnd, matcher->end(i, status)); 362750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 362850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Error on end position; keep going; real error is probably yet to come as group 362950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // end positions work from end of the input data towards the front. 363027f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expectedEndUTF8) { 363150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: incorrect end position for group %d. Expected %d, got %d (UTF8)", 363227f654740f2a26ad62a5c155af9199af9e69b889claireho line, i, expectedEndUTF8, UTF8Matcher->end(i, status)); 363350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 363450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Error on end position; keep going; real error is probably yet to come as group 363550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // end positions work from end of the input data towards the front. 363650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 363750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 363850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ( matcher->groupCount()+1 < groupStarts.size()) { 363950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: Expected %d capture groups, found %d.", 364050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, groupStarts.size()-1, matcher->groupCount()); 364150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 364250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 364350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (UTF8Matcher != NULL && UTF8Matcher->groupCount()+1 < groupStarts.size()) { 364450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: Expected %d capture groups, found %d. (UTF8)", 364550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line, groupStarts.size()-1, UTF8Matcher->groupCount()); 364650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 364750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 364850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 364950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == false 365050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->requireEnd() == TRUE) { 365150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE", line); 365250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 365350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == false 365450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->requireEnd() == TRUE) { 365550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UTF8)", line); 365650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 365750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 365850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 365950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == true 366050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->requireEnd() == FALSE) { 366150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", line); 366250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 366350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) && // 'Y' flag: RequireEnd() == false 366450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->requireEnd() == FALSE) { 366550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UTF8)", line); 366650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 366750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 366850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 366950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false 367050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->hitEnd() == TRUE) { 367150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line); 367250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 367350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false 367450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->hitEnd() == TRUE) { 367550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)", line); 367650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 367750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 367850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 367950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true 368050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matcher->hitEnd() == FALSE) { 368150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line); 368250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 368350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true 368450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTF8Matcher->hitEnd() == FALSE) { 368550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE (UTF8)", line); 368650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho failed = TRUE; 368750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 368850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 368950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 369050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehocleanupAndReturn: 369150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (failed) { 369250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho infoln((UnicodeString)"\""+pattern+(UnicodeString)"\" " 369350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho +flags+(UnicodeString)" \""+inputString+(UnicodeString)"\""); 369450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // callerPattern->dump(); 369550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 369650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete parseMatcher; 369750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete parsePat; 369850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete UTF8Matcher; 369950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete UTF8Pattern; 370050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 370150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete callerPattern; 370250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 370350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&inputText); 370450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] inputChars; 370550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&patternText); 370650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] patternChars; 370750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_close(UTF8Converter); 370850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 370950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 371050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 371150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 371250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 371350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 371450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 371550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Errors Check for error handling in patterns. 371650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 371750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 371850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Errors() { 371950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // \escape sequences that aren't implemented yet. 372050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho //REGEX_ERR("hex format \\x{abcd} not implemented", 1, 13, U_REGEX_UNIMPLEMENTED); 372150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 372250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Missing close parentheses 372350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("Comment (?# with no close", 1, 25, U_REGEX_MISMATCHED_PAREN); 372450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("Capturing Parenthesis(...", 1, 25, U_REGEX_MISMATCHED_PAREN); 372550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("Grouping only parens (?: blah blah", 1, 34, U_REGEX_MISMATCHED_PAREN); 372650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 372750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Extra close paren 372850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("Grouping only parens (?: blah)) blah", 1, 31, U_REGEX_MISMATCHED_PAREN); 372950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR(")))))))", 1, 1, U_REGEX_MISMATCHED_PAREN); 373050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("(((((((", 1, 7, U_REGEX_MISMATCHED_PAREN); 373150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 373250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look-ahead, Look-behind 373350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: add tests for unbounded length look-behinds. 373450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc(?<@xyz).*", 1, 7, U_REGEX_RULE_SYNTAX); // illegal construct 373550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 373650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Attempt to use non-default flags 373750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 373850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 373950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 374050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags = UREGEX_CANON_EQ | 374150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_COMMENTS | UREGEX_DOTALL | 374250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_MULTILINE; 374350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat1= RegexPattern::compile(".*", flags, pe, status); 374450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_REGEX_UNIMPLEMENTED); 374550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat1; 374650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 374750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 374850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 374950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Quantifiers are allowed only after something that can be quantified. 375050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("+", 1, 1, U_REGEX_RULE_SYNTAX); 375150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc\ndef(*2)", 2, 5, U_REGEX_RULE_SYNTAX); 375250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc**", 1, 5, U_REGEX_RULE_SYNTAX); 375350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 375450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Mal-formed {min,max} quantifiers 375550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{a,2}",1,5, U_REGEX_BAD_INTERVAL); 375650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{4,2}",1,8, U_REGEX_MAX_LT_MIN); 375750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{1,b}",1,7, U_REGEX_BAD_INTERVAL); 375850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{1,,2}",1,7, U_REGEX_BAD_INTERVAL); 375950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{1,2a}",1,8, U_REGEX_BAD_INTERVAL); 376050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{222222222222222222222}",1,14, U_REGEX_NUMBER_TOO_BIG); 376150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{5,50000000000}", 1, 17, U_REGEX_NUMBER_TOO_BIG); // Overflows int during scan 376250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{5,687865858}", 1, 16, U_REGEX_NUMBER_TOO_BIG); // Overflows regex binary format 376350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("abc{687865858,687865859}", 1, 24, U_REGEX_NUMBER_TOO_BIG); 376450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 376550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Ticket 5389 376650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX); 376750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 376850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Invalid Back Reference \0 376950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // For ICU 3.8 and earlier 377050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // For ICU versions newer than 3.8, \0 introduces an octal escape. 377150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 377250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE); 377350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 377450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 377550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 377650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 377750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------- 377850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 377950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Read a text data file, convert it to UChars, and return the data 378050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// in one big UChar * buffer, which the caller must delete. 378150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 378250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 378350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen, 378450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *defEncoding, UErrorCode &status) { 378550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *retPtr = NULL; 378650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *fileBuf = NULL; 378750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UConverter* conv = NULL; 378850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho FILE *f = NULL; 378950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 379050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ulen = 0; 379150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 379250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retPtr; 379350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 379450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 379550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 379650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Open the file. 379750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 379850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho f = fopen(fileName, "rb"); 379950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (f == 0) { 380050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Error opening test data file %s\n", fileName); 380150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_FILE_ACCESS_ERROR; 380250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 380350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 380450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 380550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Read it in 380650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 380750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t fileSize; 380850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t amt_read; 380950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 381050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fseek( f, 0, SEEK_END); 381150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileSize = ftell(f); 381250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBuf = new char[fileSize]; 381350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fseek(f, 0, SEEK_SET); 381450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho amt_read = fread(fileBuf, 1, fileSize, f); 381550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (amt_read != fileSize || fileSize <= 0) { 381650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Error reading test data file."); 381750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanUpAndReturn; 381850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 381950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 382050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 382150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look for a Unicode Signature (BOM) on the data just read 382250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 382350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t signatureLength; 382450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char * fileBufC; 382550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char* encoding; 382650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 382750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBufC = fileBuf; 382850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho encoding = ucnv_detectUnicodeSignature( 382950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBuf, fileSize, &signatureLength, &status); 383050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(encoding!=NULL ){ 383150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBufC += signatureLength; 383250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileSize -= signatureLength; 383350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 383450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho encoding = defEncoding; 383550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (strcmp(encoding, "utf-8") == 0) { 383650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("file %s is missing its BOM", fileName); 383750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 383850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 383950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 384050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 384150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Open a converter to take the rule file to UTF-16 384250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 384350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho conv = ucnv_open(encoding, &status); 384450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 384550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto cleanUpAndReturn; 384650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 384750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 384850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 384950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Convert the rules to UChar. 385050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Preflight first to determine required buffer size. 385150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 385250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ulen = ucnv_toUChars(conv, 385350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho NULL, // dest, 385450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 0, // destCapacity, 385550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBufC, 385650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileSize, 385750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &status); 385850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_BUFFER_OVERFLOW_ERROR) { 385950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Buffer Overflow is expected from the preflight operation. 386050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 386150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 386250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retPtr = new UChar[ulen+1]; 386350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_toUChars(conv, 386450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retPtr, // dest, 386550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ulen+1, 386650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileBufC, 386750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fileSize, 386850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &status); 386950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 387050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 387150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehocleanUpAndReturn: 387250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fclose(f); 387350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] fileBuf; 387450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_close(conv); 387550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 387650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); 3877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete []retPtr; 387850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retPtr = 0; 387950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ulen = 0; 388050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 388150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retPtr; 388250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 388350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 388450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 388550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------- 388650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 388750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// PerlTests - Run Perl's regular expression tests 388850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The input file for this test is re_tests, the standard regular 388950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// expression test data distributed with the Perl source code. 389050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 389150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Here is Perl's description of the test data file: 389250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 389350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # The tests are in a separate file 't/op/re_tests'. 389450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Each line in that file is a separate test. 389550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # There are five columns, separated by tabs. 389650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 389750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 1 contains the pattern, optionally enclosed in C<''>. 389850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Modifiers can be put after the closing C<'>. 389950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 390050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 2 contains the string to be matched. 390150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 390250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 3 contains the expected result: 390350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # y expect a match 390450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # n expect no match 390550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # c expect an error 390650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # B test exposes a known bug in Perl, should be skipped 390750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # b test exposes a known bug in Perl, should be skipped if noamp 390850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 390950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Columns 4 and 5 are used only if column 3 contains C<y> or C<c>. 391050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 391150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 4 contains a string, usually C<$&>. 391250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 391350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 5 contains the expected result of double-quote 391450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # interpolating that string after the match, or start of error message. 391550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 391650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # Column 6, if present, contains a reason why the test is skipped. 391750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # This is printed with "skipped", for harness to pick up. 391850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 391950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # \n in the tests are interpolated, as are variables of the form ${\w+}. 392050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # 392150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # If you want to add a regular expression test that can't be expressed 392250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// # in this format, don't add it here: put it in op/pat.t instead. 392350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 392450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// For ICU, if field 3 contains an 'i', the test will be skipped. 392550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The test exposes is some known incompatibility between ICU and Perl regexps. 392650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// (The i is in addition to whatever was there before.) 392750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 392850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------- 392950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PerlTests() { 393050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char tdd[2048]; 393150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *srcPath; 393250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 393350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 393450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 393550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 393650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Open and read the test data file. 393750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 393850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcPath=getPath(tdd, "re_tests.txt"); 393950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(srcPath==NULL) { 394050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; /* something went wrong, error already output */ 394150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 394250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 394350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len; 394450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status); 394550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 394650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; /* something went wrong, error already output */ 394750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 394850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 394950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 395050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Put the test data into a UnicodeString 395150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 395250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString testDataString(FALSE, testData, len); 395350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 395450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 395550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Regex to break the input file into lines, and strip the new lines. 395650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // One line per match, capture group one is the desired data. 395750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 395850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status); 395950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 396050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("RegexPattern::compile() error"); 396150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 396250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 396350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher* lineMat = linePat->matcher(testDataString, status); 396450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 396550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 396650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Regex to split a test file line into fields. 396750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There are six fields, separated by tabs. 396850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 396950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status); 397050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 397150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 397250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Regex to identify test patterns with flag settings, and to separate them. 397350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test patterns with flags look like 'pattern'i 397450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test patterns without flags are not quoted: pattern 397550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Coming out, capture group 2 is the pattern, capture group 3 is the flags. 397650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 397750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status); 397850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher* flagMat = flagPat->matcher(status); 397950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 398050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 398150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The Perl tests reference several perl-isms, which are evaluated/substituted 398250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // in the test data. Not being perl, this must be done explicitly. Here 398350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // are string constants and REs for these constructs. 398450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 398550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString nulnulSrc("${nulnul}"); 398650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString nulnul("\\u0000\\u0000", -1, US_INV); 398750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho nulnul = nulnul.unescape(); 398850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 398950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString ffffSrc("${ffff}"); 399050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString ffff("\\uffff", -1, US_INV); 399150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ffff = ffff.unescape(); 399250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 399350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // regexp for $-[0], $+[2], etc. 399450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status); 399550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *groupsMat = groupsPat->matcher(status); 399650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 399750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // regexp for $0, $1, $2, etc. 399850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status); 399950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *cgMat = cgPat->matcher(status); 400050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 400150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 400250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 400350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Main Loop for the Perl Tests, runs once per line from the 400450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // test data file. 400550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 400650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t lineNum = 0; 400750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t skippedUnimplementedCount = 0; 400850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (lineMat->find()) { 400950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lineNum++; 401050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 401150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 401250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Get a line, break it into its fields, do the Perl 401350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // variable substitutions. 401450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 401550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString line = lineMat->group(1, status); 401650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString fields[7]; 401750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fieldPat->split(line, fields, 7, status); 401850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 401950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flagMat->reset(fields[0]); 402050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flagMat->matches(status); 402150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString pattern = flagMat->group(2, status); 402250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pattern.findAndReplace("${bang}", "!"); 402350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000")); 402450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pattern.findAndReplace(ffffSrc, ffff); 402550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 402650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 402750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Identify patterns that include match flag settings, 402850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // split off the flags, remove the extra quotes. 402950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 403050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString flagStr = flagMat->group(3, status); 403150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 403250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); 403350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 403450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 403550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t flags = 0; 403650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar UChar_c = 0x63; // Char constants for the flag letters. 403750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar UChar_i = 0x69; // (Damn the lack of Unicode support in C) 403850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar UChar_m = 0x6d; 403950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar UChar_x = 0x78; 404050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar UChar_y = 0x79; 404150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flagStr.indexOf(UChar_i) != -1) { 404250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flags |= UREGEX_CASE_INSENSITIVE; 404350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 404450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flagStr.indexOf(UChar_m) != -1) { 404550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flags |= UREGEX_MULTILINE; 404650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 404750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (flagStr.indexOf(UChar_x) != -1) { 404850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho flags |= UREGEX_COMMENTS; 404950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 405050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 405150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 405250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compile the test pattern. 405350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 405450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 405550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *testPat = RegexPattern::compile(pattern, flags, pe, status); 405650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_REGEX_UNIMPLEMENTED) { 405750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 405850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test of a feature that is planned for ICU, but not yet implemented. 405950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // skip the test. 406050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho skippedUnimplementedCount++; 406150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 406250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 406350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 406450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 406550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 406650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 406750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Some tests are supposed to generate errors. 406850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Only report an error for tests that are supposed to succeed. 406950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fields[2].indexOf(UChar_c) == -1 && // Compilation is not supposed to fail AND 407050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fields[2].indexOf(UChar_i) == -1) // it's not an accepted ICU incompatibility 407150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 407250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status)); 407350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 407450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 407550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 407650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 407750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 407850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 407950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fields[2].indexOf(UChar_i) >= 0) { 408050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // ICU should skip this test. 408150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 408250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 408350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 408450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 408550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fields[2].indexOf(UChar_c) >= 0) { 408650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This pattern should have caused a compilation error, but didn't/ 408750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("line %d: Expected a pattern compile error, got success.", lineNum); 408850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 408950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 409050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 409150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 409250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 409350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // replace the Perl variables that appear in some of the 409450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match data strings. 409550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 409650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString matchString = fields[1]; 409750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchString.findAndReplace(nulnulSrc, nulnul); 409850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchString.findAndReplace(ffffSrc, ffff); 409950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 410050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Replace any \n in the match string with an actual new-line char. 410150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Don't do full unescape, as this unescapes more than Perl does, which 410250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // causes other spurious failures in the tests. 410350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 410450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 410550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 410650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 410750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 410850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Run the test, check for expected match/don't match result. 410950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 411050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *testMat = testPat->matcher(matchString, status); 411150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool found = testMat->find(); 411250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool expected = FALSE; 411350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fields[2].indexOf(UChar_y) >=0) { 411450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expected = TRUE; 411550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 411650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (expected != found) { 411750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("line %d: Expected %smatch, got %smatch", 411850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lineNum, expected?"":"no ", found?"":"no " ); 411950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 412050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 412150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 412250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Don't try to check expected results if there is no match. 412350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (Some have stuff in the expected fields) 412450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!found) { 412550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testMat; 412650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 412750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 412850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 412950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 413050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 413150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Interpret the Perl expression from the fourth field of the data file, 413250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // building up an ICU string from the results of the ICU match. 413350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The Perl expression will contain references to the results of 413450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // a regex match, including the matched string, capture group strings, 413550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // group starting and ending indicies, etc. 413650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 413750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString resultString; 413850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString perlExpr = fields[3]; 413950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if SUPPORT_MUTATING_INPUT_STRING 414050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho groupsMat->reset(perlExpr); 414150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cgMat->reset(perlExpr); 414250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 414350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 414450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (perlExpr.length() > 0) { 414550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !SUPPORT_MUTATING_INPUT_STRING 414650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Perferred usage. Reset after any modification to input string. 414750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho groupsMat->reset(perlExpr); 414850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cgMat->reset(perlExpr); 414950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 415050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 415150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (perlExpr.startsWith("$&")) { 415250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(testMat->group(status)); 415350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 2); 415450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 415550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 415650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (groupsMat->lookingAt(status)) { 415750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // $-[0] $+[2] etc. 415850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString digitString = groupsMat->group(2, status); 415950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t t = 0; 416050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 416150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString plusOrMinus = groupsMat->group(1, status); 416250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t matchPosition; 416350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (plusOrMinus.compare("+") == 0) { 416450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchPosition = testMat->end(groupNum, status); 416550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 416650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchPosition = testMat->start(groupNum, status); 416750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 416850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (matchPosition != -1) { 416950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ICU_Utility::appendNumber(resultString, matchPosition); 417050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 417150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, groupsMat->end(status)); 417250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 417350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 417450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (cgMat->lookingAt(status)) { 417550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // $1, $2, $3, etc. 417650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString digitString = cgMat->group(1, status); 417750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t t = 0; 417850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 417950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 418050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(testMat->group(groupNum, status)); 418150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 418250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 418350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, cgMat->end(status)); 418450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 418550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 418650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (perlExpr.startsWith("@-")) { 418750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 418850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<=testMat->groupCount(); i++) { 418950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (i>0) { 419050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(" "); 419150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 419250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ICU_Utility::appendNumber(resultString, testMat->start(i, status)); 419350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 419450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 2); 419550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 419650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 419750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (perlExpr.startsWith("@+")) { 419850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 419950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<=testMat->groupCount(); i++) { 420050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (i>0) { 420150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(" "); 420250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 420350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ICU_Utility::appendNumber(resultString, testMat->end(i, status)); 420450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 420550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 2); 420650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 420750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 420850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) { // \Escape. Take following char as a literal. 420950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // or as an escaped sequence (e.g. \n) 421050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (perlExpr.length() > 1) { 421150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 1); // Remove the '\', but only if not last char. 421250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 421350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c = perlExpr.charAt(0); 421450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho switch (c) { 421550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case 'n': c = '\n'; break; 421650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // add any other escape sequences that show up in the test expected results. 421750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 421850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(c); 421950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 1); 422050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 422150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 422250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else { 422350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Any characters from the perl expression that we don't explicitly 422450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // recognize before here are assumed to be literals and copied 422550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // as-is to the expected results. 422650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultString.append(perlExpr.charAt(0)); 422750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho perlExpr.remove(0, 1); 422850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 422950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 423050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 423150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status)); 423250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 423350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 423450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 423550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 423650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 423750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Expected Results Compare 423850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 423950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString expectedS(fields[4]); 424050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expectedS.findAndReplace(nulnulSrc, nulnul); 424150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expectedS.findAndReplace(ffffSrc, ffff); 424250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 4243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 424550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (expectedS.compare(resultString) != 0) { 424650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho err("Line %d: Incorrect perl expression results.", lineNum); 424750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\""); 424850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 425050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testMat; 425150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete testPat; 4252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 425550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // All done. Clean up allocated stuff. 4256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 425750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete cgMat; 425850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete cgPat; 4259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 426050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete groupsMat; 426150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete groupsPat; 4262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 426350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete flagMat; 426450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete flagPat; 4265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 426650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete lineMat; 426750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete linePat; 4268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 426950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fieldPat; 427050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete [] testData; 427150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 427250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 427350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount); 4274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------- 4279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 428050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// PerlTestsUTF8 Run Perl's regular expression tests on UTF-8-based UTexts 428150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// (instead of using UnicodeStrings) to test the alternate engine. 428250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The input file for this test is re_tests, the standard regular 428350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// expression test data distributed with the Perl source code. 428450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// See PerlTests() for more information. 4285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 4286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------- 428750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PerlTestsUTF8() { 4288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char tdd[2048]; 4289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *srcPath; 4290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 429250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalUConverterPointer UTF8Converter(ucnv_open("UTF-8", &status)); 429350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText patternText = UTEXT_INITIALIZER; 429450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *patternChars = NULL; 429550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t patternLength; 429650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t patternCapacity = 0; 429750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText inputText = UTEXT_INITIALIZER; 429850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *inputChars = NULL; 429950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t inputLength; 430050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t inputCapacity = 0; 430150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 430250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); 4303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Open and read the test data file. 4306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcPath=getPath(tdd, "re_tests.txt"); 4308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcPath==NULL) { 4309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; /* something went wrong, error already output */ 4310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len; 4313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status); 4314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; /* something went wrong, error already output */ 4316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Put the test data into a UnicodeString 4320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString testDataString(FALSE, testData, len); 4322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Regex to break the input file into lines, and strip the new lines. 4325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // One line per match, capture group one is the desired data. 4326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status); 4328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dataerrln("RegexPattern::compile() error"); 4330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 4331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher* lineMat = linePat->matcher(testDataString, status); 4333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Regex to split a test file line into fields. 4336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // There are six fields, separated by tabs. 4337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status); 4339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Regex to identify test patterns with flag settings, and to separate them. 4342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test patterns with flags look like 'pattern'i 4343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test patterns without flags are not quoted: pattern 4344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Coming out, capture group 2 is the pattern, capture group 3 is the flags. 4345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status); 4347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher* flagMat = flagPat->matcher(status); 4348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The Perl tests reference several perl-isms, which are evaluated/substituted 4351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in the test data. Not being perl, this must be done explicitly. Here 4352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // are string constants and REs for these constructs. 4353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString nulnulSrc("${nulnul}"); 4355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString nulnul("\\u0000\\u0000", -1, US_INV); 4356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nulnul = nulnul.unescape(); 4357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString ffffSrc("${ffff}"); 4359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString ffff("\\uffff", -1, US_INV); 4360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ffff = ffff.unescape(); 4361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // regexp for $-[0], $+[2], etc. 4363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status); 4364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *groupsMat = groupsPat->matcher(status); 4365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // regexp for $0, $1, $2, etc. 4367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status); 4368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *cgMat = cgPat->matcher(status); 4369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Main Loop for the Perl Tests, runs once per line from the 4373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test data file. 4374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lineNum = 0; 4376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t skippedUnimplementedCount = 0; 4377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (lineMat->find()) { 4378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lineNum++; 4379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Get a line, break it into its fields, do the Perl 4382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // variable substitutions. 4383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString line = lineMat->group(1, status); 4385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString fields[7]; 4386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fieldPat->split(line, fields, 7, status); 4387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flagMat->reset(fields[0]); 4389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flagMat->matches(status); 4390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pattern = flagMat->group(2, status); 4391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern.findAndReplace("${bang}", "!"); 4392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000")); 4393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern.findAndReplace(ffffSrc, ffff); 4394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Identify patterns that include match flag settings, 4397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // split off the flags, remove the extra quotes. 4398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString flagStr = flagMat->group(3, status); 4400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); 4402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 4403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags = 0; 4405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar UChar_c = 0x63; // Char constants for the flag letters. 4406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar UChar_i = 0x69; // (Damn the lack of Unicode support in C) 4407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar UChar_m = 0x6d; 4408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar UChar_x = 0x78; 4409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar UChar_y = 0x79; 4410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (flagStr.indexOf(UChar_i) != -1) { 4411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flags |= UREGEX_CASE_INSENSITIVE; 4412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (flagStr.indexOf(UChar_m) != -1) { 4414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flags |= UREGEX_MULTILINE; 4415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (flagStr.indexOf(UChar_x) != -1) { 4417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flags |= UREGEX_COMMENTS; 4418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 441950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 442050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 442150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Put the pattern in a UTF-8 UText 442250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 442350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 442450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternLength = pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status); 442550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_BUFFER_OVERFLOW_ERROR) { 442650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 442750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] patternChars; 442850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternCapacity = patternLength + 1; 442950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars = new char[patternCapacity]; 443050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status); 443150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 443250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&patternText, patternChars, patternLength, &status); 4433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compile the test pattern. 4436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 443750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *testPat = RegexPattern::compile(&patternText, flags, pe, status); 4438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (status == U_REGEX_UNIMPLEMENTED) { 4439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test of a feature that is planned for ICU, but not yet implemented. 4441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // skip the test. 4442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru skippedUnimplementedCount++; 4443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testPat; 4444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 4445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 4446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Some tests are supposed to generate errors. 4450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Only report an error for tests that are supposed to succeed. 4451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fields[2].indexOf(UChar_c) == -1 && // Compilation is not supposed to fail AND 4452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[2].indexOf(UChar_i) == -1) // it's not an accepted ICU incompatibility 4453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status)); 4455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 4457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testPat; 4458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 4459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fields[2].indexOf(UChar_i) >= 0) { 4462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ICU should skip this test. 4463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testPat; 4464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 4465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fields[2].indexOf(UChar_c) >= 0) { 4468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This pattern should have caused a compilation error, but didn't/ 4469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("line %d: Expected a pattern compile error, got success.", lineNum); 4470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testPat; 4471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 4472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 447450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // replace the Perl variables that appear in some of the 4477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match data strings. 4478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString matchString = fields[1]; 4480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchString.findAndReplace(nulnulSrc, nulnul); 4481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchString.findAndReplace(ffffSrc, ffff); 4482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Replace any \n in the match string with an actual new-line char. 4484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Don't do full unescape, as this unescapes more than Perl does, which 4485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // causes other spurious failures in the tests. 4486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 4487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 448850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 448950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Put the input in a UTF-8 UText 449050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 449150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 449250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputLength = matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status); 449350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (status == U_BUFFER_OVERFLOW_ERROR) { 449450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 449550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete[] inputChars; 449650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputCapacity = inputLength + 1; 449750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputChars = new char[inputCapacity]; 449850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status); 449950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 450050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUTF8(&inputText, inputChars, inputLength, &status); 4501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Run the test, check for expected match/don't match result. 4504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4505b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher *testMat = &testPat->matcher(status)->reset(&inputText); 4506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool found = testMat->find(); 4507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool expected = FALSE; 4508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fields[2].indexOf(UChar_y) >=0) { 4509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected = TRUE; 4510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expected != found) { 4512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("line %d: Expected %smatch, got %smatch", 4513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lineNum, expected?"":"no ", found?"":"no " ); 4514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 4515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Don't try to check expected results if there is no match. 4518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // (Some have stuff in the expected fields) 4519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!found) { 4520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete testMat; 4521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete testPat; 4522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 4523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Interpret the Perl expression from the fourth field of the data file, 4527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // building up an ICU string from the results of the ICU match. 4528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The Perl expression will contain references to the results of 4529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a regex match, including the matched string, capture group strings, 4530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // group starting and ending indicies, etc. 4531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString resultString; 4533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString perlExpr = fields[3]; 4534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (perlExpr.length() > 0) { 453650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho groupsMat->reset(perlExpr); 453750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cgMat->reset(perlExpr); 453850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (perlExpr.startsWith("$&")) { 4540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(testMat->group(status)); 4541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 2); 4542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (groupsMat->lookingAt(status)) { 4545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // $-[0] $+[2] etc. 4546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString digitString = groupsMat->group(2, status); 4547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t t = 0; 4548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 4549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString plusOrMinus = groupsMat->group(1, status); 4550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchPosition; 4551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (plusOrMinus.compare("+") == 0) { 4552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchPosition = testMat->end(groupNum, status); 4553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchPosition = testMat->start(groupNum, status); 4555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (matchPosition != -1) { 4557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::appendNumber(resultString, matchPosition); 4558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, groupsMat->end(status)); 4560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (cgMat->lookingAt(status)) { 4563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // $1, $2, $3, etc. 4564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString digitString = cgMat->group(1, status); 4565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t t = 0; 4566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10); 4567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 4568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(testMat->group(groupNum, status)); 4569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 4570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, cgMat->end(status)); 4572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (perlExpr.startsWith("@-")) { 4575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 4576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<=testMat->groupCount(); i++) { 4577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i>0) { 4578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(" "); 4579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::appendNumber(resultString, testMat->start(i, status)); 4581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 2); 4583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (perlExpr.startsWith("@+")) { 4586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 4587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<=testMat->groupCount(); i++) { 4588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i>0) { 4589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(" "); 4590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::appendNumber(resultString, testMat->end(i, status)); 4592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 2); 4594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) { // \Escape. Take following char as a literal. 4597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // or as an escaped sequence (e.g. \n) 4598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (perlExpr.length() > 1) { 4599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 1); // Remove the '\', but only if not last char. 4600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c = perlExpr.charAt(0); 4602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (c) { 4603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'n': c = '\n'; break; 4604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // add any other escape sequences that show up in the test expected results. 4605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(c); 4607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 1); 4608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 4611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Any characters from the perl expression that we don't explicitly 4612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // recognize before here are assumed to be literals and copied 4613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // as-is to the expected results. 4614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resultString.append(perlExpr.charAt(0)); 4615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru perlExpr.remove(0, 1); 4616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status)); 4620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Expected Results Compare 4626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString expectedS(fields[4]); 4628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedS.findAndReplace(nulnulSrc, nulnul); 4629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedS.findAndReplace(ffffSrc, ffff); 4630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); 4631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedS.compare(resultString) != 0) { 4634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err("Line %d: Incorrect perl expression results.", lineNum); 463550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\""); 4636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testMat; 4639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testPat; 4640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // All done. Clean up allocated stuff. 4644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete cgMat; 4646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete cgPat; 4647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete groupsMat; 4649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete groupsPat; 4650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete flagMat; 4652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete flagPat; 4653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete lineMat; 4655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete linePat; 4656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fieldPat; 4658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete [] testData; 465950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 466050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&patternText); 466150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&inputText); 466250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 466350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete [] patternChars; 466450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete [] inputChars; 4665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount); 4668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4672b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//-------------------------------------------------------------- 4673b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 4674b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Bug6149 Verify limits to heap expansion for backtrack stack. 4675b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Use this pattern, 4676b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// "(a?){1,}" 4677b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// The zero-length match will repeat forever. 4678b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// (That this goes into a loop is another bug) 4679b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 4680b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//--------------------------------------------------------------- 4681b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid RegexTest::Bug6149() { 4682b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString pattern("(a?){1,}"); 4683b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString s("xyz"); 4684b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t flags = 0; 4685b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4686b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 4687b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru RegexMatcher matcher(pattern, s, flags, status); 4688b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UBool result = false; 4689b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru REGEX_ASSERT_FAIL(result=matcher.matches(status), U_REGEX_STACK_OVERFLOW); 4690b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru REGEX_ASSERT(result == FALSE); 4691b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 4692b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 4693b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 4694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 4695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Callbacks() Test the callback function. 4696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// When set, callbacks occur periodically during matching operations, 4697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// giving the application code the ability to abort the operation 4698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// before it's normal completion. 4699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 4700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustruct callBackContext { 4702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexTest *test; 4703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t maxCalls; 4704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t numCalls; 4705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t lastSteps; 4706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru void reset(int32_t max) {maxCalls=max; numCalls=0; lastSteps=0;}; 4707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 4708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CDECL_BEGIN 4710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic UBool U_CALLCONV 4711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QuerutestCallBackFn(const void *context, int32_t steps) { 4712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru callBackContext *info = (callBackContext *)context; 4713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (info->lastSteps+1 != steps) { 4714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru info->test->errln("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps); 4715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru info->lastSteps = steps; 4717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru info->numCalls++; 4718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (info->numCalls < info->maxCalls); 4719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 4720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CDECL_END 4721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid RegexTest::Callbacks() { 4723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 4724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Getter returns NULLs if no callback has been set 4725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The variables that the getter will fill in. 4727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Init to non-null values so that the action of the getter can be seen. 4728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const void *returnedContext = &returnedContext; 4729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru URegexMatchCallback *returnedFn = &testCallBackFn; 4730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher("x", 0, status); 4733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.getMatchCallback(returnedFn, returnedContext, status); 4735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(returnedFn == NULL); 4737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(returnedContext == NULL); 4738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 4741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Set and Get work 4742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru callBackContext cbInfo = {this, 0, 0, 0}; 4743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const void *returnedContext; 4744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru URegexMatchCallback *returnedFn; 4745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. 4747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.setMatchCallback(testCallBackFn, &cbInfo, status); 4749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.getMatchCallback(returnedFn, returnedContext, status); 4751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(returnedFn == testCallBackFn); 4753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(returnedContext == &cbInfo); 4754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // A short-running match shouldn't invoke the callback 4756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 4757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cbInfo.reset(1); 4758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString s = "xxx"; 4759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.reset(s); 4760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.matches(status)); 4761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(cbInfo.numCalls == 0); 4763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // A medium-length match that runs long enough to invoke the 4765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // callback, but not so long that the callback aborts it. 4766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 4767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cbInfo.reset(4); 4768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s = "aaaaaaaaaaaaaaaaaaab"; 4769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.reset(s); 4770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.matches(status)==FALSE); 4771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_CHECK_STATUS; 4772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(cbInfo.numCalls > 0); 4773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // A longer running match that the callback function will abort. 4775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 4776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cbInfo.reset(4); 4777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s = "aaaaaaaaaaaaaaaaaaaaaaab"; 4778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matcher.reset(s); 4779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(matcher.matches(status)==FALSE); 4780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); 4781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REGEX_ASSERT(cbInfo.numCalls == 4); 4782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 4786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 478750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 478827f654740f2a26ad62a5c155af9199af9e69b889claireho// 478927f654740f2a26ad62a5c155af9199af9e69b889claireho// FindProgressCallbacks() Test the find "progress" callback function. 479027f654740f2a26ad62a5c155af9199af9e69b889claireho// When set, the find progress callback will be invoked during a find operations 479127f654740f2a26ad62a5c155af9199af9e69b889claireho// after each return from a match attempt, giving the application the opportunity 479227f654740f2a26ad62a5c155af9199af9e69b889claireho// to terminate a long-running find operation before it's normal completion. 479327f654740f2a26ad62a5c155af9199af9e69b889claireho// 479427f654740f2a26ad62a5c155af9199af9e69b889claireho 479527f654740f2a26ad62a5c155af9199af9e69b889clairehostruct progressCallBackContext { 479627f654740f2a26ad62a5c155af9199af9e69b889claireho RegexTest *test; 479727f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t lastIndex; 479827f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t maxCalls; 479927f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t numCalls; 480027f654740f2a26ad62a5c155af9199af9e69b889claireho void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}; 480127f654740f2a26ad62a5c155af9199af9e69b889claireho}; 480227f654740f2a26ad62a5c155af9199af9e69b889claireho 480327f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_BEGIN 480427f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool U_CALLCONV 480527f654740f2a26ad62a5c155af9199af9e69b889clairehotestProgressCallBackFn(const void *context, int64_t matchIndex) { 480627f654740f2a26ad62a5c155af9199af9e69b889claireho progressCallBackContext *info = (progressCallBackContext *)context; 480727f654740f2a26ad62a5c155af9199af9e69b889claireho info->numCalls++; 480827f654740f2a26ad62a5c155af9199af9e69b889claireho info->lastIndex = matchIndex; 480927f654740f2a26ad62a5c155af9199af9e69b889claireho// info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n", matchIndex, info->numCalls); 481027f654740f2a26ad62a5c155af9199af9e69b889claireho return (info->numCalls < info->maxCalls); 481127f654740f2a26ad62a5c155af9199af9e69b889claireho} 481227f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_END 481327f654740f2a26ad62a5c155af9199af9e69b889claireho 481427f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::FindProgressCallbacks() { 481527f654740f2a26ad62a5c155af9199af9e69b889claireho { 481627f654740f2a26ad62a5c155af9199af9e69b889claireho // Getter returns NULLs if no callback has been set 481727f654740f2a26ad62a5c155af9199af9e69b889claireho 481827f654740f2a26ad62a5c155af9199af9e69b889claireho // The variables that the getter will fill in. 481927f654740f2a26ad62a5c155af9199af9e69b889claireho // Init to non-null values so that the action of the getter can be seen. 482027f654740f2a26ad62a5c155af9199af9e69b889claireho const void *returnedContext = &returnedContext; 482127f654740f2a26ad62a5c155af9199af9e69b889claireho URegexFindProgressCallback *returnedFn = &testProgressCallBackFn; 482227f654740f2a26ad62a5c155af9199af9e69b889claireho 482327f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status = U_ZERO_ERROR; 482427f654740f2a26ad62a5c155af9199af9e69b889claireho RegexMatcher matcher("x", 0, status); 482527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 482627f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.getFindProgressCallback(returnedFn, returnedContext, status); 482727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 482827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(returnedFn == NULL); 482927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(returnedContext == NULL); 483027f654740f2a26ad62a5c155af9199af9e69b889claireho } 483127f654740f2a26ad62a5c155af9199af9e69b889claireho 483227f654740f2a26ad62a5c155af9199af9e69b889claireho { 483327f654740f2a26ad62a5c155af9199af9e69b889claireho // Set and Get work 483427f654740f2a26ad62a5c155af9199af9e69b889claireho progressCallBackContext cbInfo = {this, 0, 0, 0}; 483527f654740f2a26ad62a5c155af9199af9e69b889claireho const void *returnedContext; 483627f654740f2a26ad62a5c155af9199af9e69b889claireho URegexFindProgressCallback *returnedFn; 483727f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status = U_ZERO_ERROR; 483827f654740f2a26ad62a5c155af9199af9e69b889claireho RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. 483927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 484027f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status); 484127f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 484227f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.getFindProgressCallback(returnedFn, returnedContext, status); 484327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 484427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(returnedFn == testProgressCallBackFn); 484527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(returnedContext == &cbInfo); 484627f654740f2a26ad62a5c155af9199af9e69b889claireho 484727f654740f2a26ad62a5c155af9199af9e69b889claireho // A short-running match should NOT invoke the callback. 484827f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 484927f654740f2a26ad62a5c155af9199af9e69b889claireho cbInfo.reset(100); 485027f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString s = "abxxx"; 485127f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.reset(s); 485227f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0 485327f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.setTrace(TRUE); 485427f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 485527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(matcher.find(0, status)); 485627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 485727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(cbInfo.numCalls == 0); 485827f654740f2a26ad62a5c155af9199af9e69b889claireho 485927f654740f2a26ad62a5c155af9199af9e69b889claireho // A medium running match that causes matcher.find() to invoke our callback for each index. 486027f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 486127f654740f2a26ad62a5c155af9199af9e69b889claireho s = "aaaaaaaaaaaaaaaaaaab"; 486227f654740f2a26ad62a5c155af9199af9e69b889claireho cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string 486327f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.reset(s); 486427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(matcher.find(0, status)==FALSE); 486527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 486627f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25); 486727f654740f2a26ad62a5c155af9199af9e69b889claireho 486827f654740f2a26ad62a5c155af9199af9e69b889claireho // A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point. 486927f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 487027f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab"; 487127f654740f2a26ad62a5c155af9199af9e69b889claireho cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string 487227f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.reset(s1); 487327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(matcher.find(0, status)==FALSE); 487427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 487527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5); 487627f654740f2a26ad62a5c155af9199af9e69b889claireho 487727f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0 487827f654740f2a26ad62a5c155af9199af9e69b889claireho // Now a match that will succeed, but after an interruption 487927f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 488027f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx"; 488127f654740f2a26ad62a5c155af9199af9e69b889claireho cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string 488227f654740f2a26ad62a5c155af9199af9e69b889claireho matcher.reset(s2); 488327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(matcher.find(0, status)==FALSE); 488427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 488527f654740f2a26ad62a5c155af9199af9e69b889claireho // Now retry the match from where left off 488627f654740f2a26ad62a5c155af9199af9e69b889claireho cbInfo.maxCalls = 100; // No callback limit 488727f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status)); 488827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 488927f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 489027f654740f2a26ad62a5c155af9199af9e69b889claireho } 489127f654740f2a26ad62a5c155af9199af9e69b889claireho 489227f654740f2a26ad62a5c155af9199af9e69b889claireho 489327f654740f2a26ad62a5c155af9199af9e69b889claireho} 489427f654740f2a26ad62a5c155af9199af9e69b889claireho 489527f654740f2a26ad62a5c155af9199af9e69b889claireho 489650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 489750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 489850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// PreAllocatedUTextCAPI Check the C API with pre-allocated mutable 489950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// UTexts. The pure-C implementation of UText 490050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// has no mutable backing stores, but we can 490150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// use UnicodeString here to test the functionality. 490250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 490350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------- 490450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PreAllocatedUTextCAPI () { 490550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 490650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho URegularExpression *re; 490750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText patternText = UTEXT_INITIALIZER; 490850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString buffer; 490950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText bufferText = UTEXT_INITIALIZER; 491050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 491150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&bufferText, &buffer, &status); 491250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 491350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 491450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * getText() and getUText() 491550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 491650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 491750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText text1 = UTEXT_INITIALIZER; 491850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText text2 = UTEXT_INITIALIZER; 491950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text2Chars[20]; 492050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *resultText; 492150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 492250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 492327f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status); 492427f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status); 492550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2); 492650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&text2, text2Chars, -1, &status); 492750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 492827f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status); 492950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re = uregex_openUText(&patternText, 0, NULL, &status); 493050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 493150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* First set a UText */ 493250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setUText(re, &text1, &status); 493350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultText = uregex_getUText(re, &bufferText, &status); 493450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 493550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(resultText == &bufferText); 493650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(resultText, 0); 493750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(&text1, 0); 493850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); 493950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 494050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultText = uregex_getUText(re, &bufferText, &status); 494150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 494250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(resultText == &bufferText); 494350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(resultText, 0); 494450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(&text1, 0); 494550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); 494650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 494750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Then set a UChar * */ 494850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text2Chars, 7, &status); 494950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resultText = uregex_getUText(re, &bufferText, &status); 495050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 495150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(resultText == &bufferText); 495250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(resultText, 0); 495350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_setNativeIndex(&text2, 0); 495450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0); 495550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 495650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_close(re); 495750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&text1); 495850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&text2); 495950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 496050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 496150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 496250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * group() 496350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 496450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 496550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text1[80]; 496650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *actual; 496750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool result; 496850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 496950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 497050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 497150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re = uregex_openC("abc(.*?)def", 0, NULL, &status); 497250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 497350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 497450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text1, -1, &status); 497550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_find(re, 0, &status); 497650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result==TRUE); 497750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 497850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Capture Group 0, the full match. Should succeed. */ 497950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 498027f654740f2a26ad62a5c155af9199af9e69b889claireho actual = uregex_groupUTextDeep(re, 0, &bufferText, &status); 498150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 498250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(actual == &bufferText); 498327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual); 498450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 498550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Capture group #1. Should succeed. */ 498650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 498727f654740f2a26ad62a5c155af9199af9e69b889claireho actual = uregex_groupUTextDeep(re, 1, &bufferText, &status); 498850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 498950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(actual == &bufferText); 499027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual); 499150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 499250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Capture group out of range. Error. */ 499350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 499427f654740f2a26ad62a5c155af9199af9e69b889claireho actual = uregex_groupUTextDeep(re, 2, &bufferText, &status); 499550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 499650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(actual == &bufferText); 499750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 499850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_close(re); 499950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 500050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 500150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 500250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 500350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * replaceFirst() 500450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 500550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 500650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text1[80]; 500750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text2[80]; 500850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replText = UTEXT_INITIALIZER; 500950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *result; 501050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 501150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 501250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 501350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 501427f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); 501550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 501650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re = uregex_openC("x(.*?)x", 0, NULL, &status); 501750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 501850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 501950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Normal case, with match */ 502050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text1, -1, &status); 502150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 502250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 502350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 502450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &bufferText); 502527f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result); 502650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 502750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* No match. Text should copy to output with no changes. */ 502850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text2, -1, &status); 502950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 503050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 503150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 503250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &bufferText); 503327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); 503450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 503550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Unicode escapes */ 503650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text1, -1, &status); 503727f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status); 503850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 503950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 504050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 504150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &bufferText); 504227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result); 504350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 504450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_close(re); 504550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replText); 504650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 504750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 504850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 504950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 505050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * replaceAll() 505150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 505250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 505350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text1[80]; 505450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar text2[80]; 505550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replText = UTEXT_INITIALIZER; 505650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *result; 505750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 505850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 505950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 506050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 506127f654740f2a26ad62a5c155af9199af9e69b889claireho regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); 506250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 506350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re = uregex_openC("x(.*?)x", 0, NULL, &status); 506450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 506550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 506650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Normal case, with match */ 506750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text1, -1, &status); 506850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 506950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_replaceAllUText(re, &replText, &bufferText, &status); 507050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 507150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &bufferText); 507227f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> <1> <...>.", result); 507350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 507450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* No match. Text should copy to output with no changes. */ 507550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_setText(re, text2, -1, &status); 507650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 507750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = uregex_replaceAllUText(re, &replText, &bufferText, &status); 507850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 507950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(result == &bufferText); 508027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); 508150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 508250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_close(re); 508350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replText); 508450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 508550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 508650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 508750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 508850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * splitUText() uses the C++ API directly, and the UnicodeString version uses mutable UTexts, 508950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * so we don't need to test it here. 509050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 509150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 509250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&bufferText); 509350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&patternText); 509450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 509550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 509650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------- 509750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 509850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Bug7651 Regex pattern that exceeds default operator stack depth in matcher. 509950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 510050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------- 510150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Bug7651() { 510250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString pattern1("((?<![A-Za-z0-9])[#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|(?<![A-Za-z0-9_])[@\\uff20][A-Za-z0-9_]+(?:\\/[\\w-]+)?|(https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|\\$[A-Za-z]+)"); 510350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The following should exceed the default operator stack depth in the matcher, i.e. force the matcher to malloc instead of using fSmallData. 510427f654740f2a26ad62a5c155af9199af9e69b889claireho // It will cause a segfault if RegexMatcher tries to use fSmallData instead of malloc'ing the memory needed (see init2) for the pattern operator stack allocation. 510550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString pattern2("((https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|(?<![A-Za-z0-9_])[\\@\\uff20][A-Za-z0-9_]+(?:\\/[\\w\\-]+)?|(?<![A-Za-z0-9])[\\#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|\\$[A-Za-z]+)"); 510650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString s("#ff @abcd This is test"); 510750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *REPattern = NULL; 510850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *REMatcher = NULL; 510950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 511050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 511150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 511250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REPattern = RegexPattern::compile(pattern1, 0, pe, status); 511350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 511450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REMatcher = REPattern->matcher(s, status); 511550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 511650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(REMatcher->find()); 511750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(REMatcher->start(status) == 0); 511850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REPattern; 511950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REMatcher; 512050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 512150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 512250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REPattern = RegexPattern::compile(pattern2, 0, pe, status); 512350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 512450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REMatcher = REPattern->matcher(s, status); 512550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_CHECK_STATUS; 512650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(REMatcher->find()); 512750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_ASSERT(REMatcher->start(status) == 0); 512850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REPattern; 512950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete REMatcher; 513050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 513150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 513250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 513327f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::Bug7740() { 513427f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status = U_ZERO_ERROR; 513527f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString pattern = "(a)"; 513627f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString text = "abcdef"; 513727f654740f2a26ad62a5c155af9199af9e69b889claireho RegexMatcher *m = new RegexMatcher(pattern, text, 0, status); 513827f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 513927f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(m->lookingAt(status)); 514027f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_CHECK_STATUS; 514127f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ILLEGAL_ARGUMENT_ERROR; 514227f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString s = m->group(1, status); // Bug 7740: segfault here. 514327f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 514427f654740f2a26ad62a5c155af9199af9e69b889claireho REGEX_ASSERT(s == ""); 514527f654740f2a26ad62a5c155af9199af9e69b889claireho delete m; 514627f654740f2a26ad62a5c155af9199af9e69b889claireho} 514727f654740f2a26ad62a5c155af9199af9e69b889claireho 5148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Bug 8479: was crashing whith a Bogus UnicodeString as input. 5149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 5150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::Bug8479() { 5151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UErrorCode status = U_ZERO_ERROR; 515227f654740f2a26ad62a5c155af9199af9e69b889claireho 5153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher* const pMatcher = new RegexMatcher("\\Aboo\\z", UREGEX_DOTALL|UREGEX_CASE_INSENSITIVE, status); 5154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 5155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_SUCCESS(status)) 5156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho { 5157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString str; 5158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho str.setToBogus(); 5159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pMatcher->reset(str); 5160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ZERO_ERROR; 5161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pMatcher->matches(status); 5162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 5163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete pMatcher; 5164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 516627f654740f2a26ad62a5c155af9199af9e69b889claireho 516727f654740f2a26ad62a5c155af9199af9e69b889claireho 5168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Bug 7029 5169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::Bug7029() { 5170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UErrorCode status = U_ZERO_ERROR; 5171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 5172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status); 5173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString text = "abc.def"; 5174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString splits[10]; 5175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 5176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t numFields = pMatcher->split(text, splits, 10, status); 5177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_CHECK_STATUS; 5178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REGEX_ASSERT(numFields == 8); 5179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete pMatcher; 5180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 5181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 5182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::CheckInvBufSize() { 5183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(inv_next>=INV_BUFSIZ) { 5184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %d )\n", 5185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho __FILE__, INV_BUFSIZ, inv_next); 5186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 5187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next); 5188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 5189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 5190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 5191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 5192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5193