regextst.cpp revision b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2
1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/********************************************************************
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT:
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Copyright (c) 2002-2011, International Business Machines Corporation and
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************/
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   regextst.cpp
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      ICU Regular Expressions test, part of intltest.
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     NOTE!!
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     PLEASE be careful about ASCII assumptions in this test.
17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     This test is one of the worst repeat offenders.
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     If you have questions, contact someone on the ICU PMC
19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     who has access to an EBCDIC system.
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "intltest.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h"
2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/ustring.h"
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regextst.h"
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h"
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h>
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h>
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
3627f654740f2a26ad62a5c155af9199af9e69b889claireho#include "cstring.h"
3727f654740f2a26ad62a5c155af9199af9e69b889claireho#include "uinvchar.h"
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define SUPPORT_MUTATING_INPUT_STRING   0
4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  Test class boilerplate
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexTest::RegexTest()
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexTest::~RegexTest()
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (exec) logln("TestSuite RegexTest: ");
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch (index) {
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 0: name = "Basic";
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) Basic();
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 1: name = "API_Match";
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) API_Match();
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 2: name = "API_Replace";
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) API_Replace();
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 3: name = "API_Pattern";
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) API_Pattern();
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case 4:
7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO
7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            name = "Extended";
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) Extended();
7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else
7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            name = "skip";
8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 5: name = "Errors";
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) Errors();
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 6: name = "PerlTests";
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) PerlTests();
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
88c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case 7: name = "Callbacks";
89b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if (exec) Callbacks();
90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            break;
9127f654740f2a26ad62a5c155af9199af9e69b889claireho        case 8: name = "FindProgressCallbacks";
9227f654740f2a26ad62a5c155af9199af9e69b889claireho            if (exec) FindProgressCallbacks();
9327f654740f2a26ad62a5c155af9199af9e69b889claireho            break;
9427f654740f2a26ad62a5c155af9199af9e69b889claireho        case 9: name = "Bug 6149";
95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             if (exec) Bug6149();
96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             break;
9727f654740f2a26ad62a5c155af9199af9e69b889claireho        case 10: name = "UTextBasic";
9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) UTextBasic();
9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
10027f654740f2a26ad62a5c155af9199af9e69b889claireho        case 11: name = "API_Match_UTF8";
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) API_Match_UTF8();
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
10327f654740f2a26ad62a5c155af9199af9e69b889claireho        case 12: name = "API_Replace_UTF8";
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) API_Replace_UTF8();
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
10627f654740f2a26ad62a5c155af9199af9e69b889claireho        case 13: name = "API_Pattern_UTF8";
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) API_Pattern_UTF8();
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
10927f654740f2a26ad62a5c155af9199af9e69b889claireho        case 14: name = "PerlTestsUTF8";
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) PerlTestsUTF8();
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
11227f654740f2a26ad62a5c155af9199af9e69b889claireho        case 15: name = "PreAllocatedUTextCAPI";
11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) PreAllocatedUTextCAPI();
11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
11527f654740f2a26ad62a5c155af9199af9e69b889claireho        case 16: name = "Bug 7651";
11627f654740f2a26ad62a5c155af9199af9e69b889claireho             if (exec) Bug7651();
11727f654740f2a26ad62a5c155af9199af9e69b889claireho             break;
11827f654740f2a26ad62a5c155af9199af9e69b889claireho        case 17: name = "Bug 7740";
11927f654740f2a26ad62a5c155af9199af9e69b889claireho            if (exec) Bug7740();
12027f654740f2a26ad62a5c155af9199af9e69b889claireho            break;
121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        case 18: name = "Bug 8479";
122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (exec) Bug8479();
123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        case 19: name = "Bug 7029";
125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (exec) Bug7029();
126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        case 20: name = "CheckInvBufSize";
128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (exec) CheckInvBufSize();
129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        default: name = "";
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break; //needed to end loop
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
13827f654740f2a26ad62a5c155af9199af9e69b889claireho/**
13927f654740f2a26ad62a5c155af9199af9e69b889claireho * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage
14027f654740f2a26ad62a5c155af9199af9e69b889claireho * into ASCII.
14127f654740f2a26ad62a5c155af9199af9e69b889claireho * @see utext_openUTF8
14227f654740f2a26ad62a5c155af9199af9e69b889claireho */
14327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status);
14427f654740f2a26ad62a5c155af9199af9e69b889claireho
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   Error Checking / Reporting macros used in all of the tests.
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
15127f654740f2a26ad62a5c155af9199af9e69b889clairehostatic void utextToPrintable(char *buf, int32_t bufLen, UText *text) {
15227f654740f2a26ad62a5c155af9199af9e69b889claireho  int64_t oldIndex = utext_getNativeIndex(text);
15327f654740f2a26ad62a5c155af9199af9e69b889claireho  utext_setNativeIndex(text, 0);
15427f654740f2a26ad62a5c155af9199af9e69b889claireho  char *bufPtr = buf;
15527f654740f2a26ad62a5c155af9199af9e69b889claireho  UChar32 c = utext_next32From(text, 0);
15627f654740f2a26ad62a5c155af9199af9e69b889claireho  while ((c != U_SENTINEL) && (bufPtr < buf+bufLen)) {
15727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (0x000020<=c && c<0x00007e) {
15827f654740f2a26ad62a5c155af9199af9e69b889claireho      *bufPtr = c;
15927f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
16027f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0
16127f654740f2a26ad62a5c155af9199af9e69b889claireho      sprintf(bufPtr,"U+%04X", c);
16227f654740f2a26ad62a5c155af9199af9e69b889claireho      bufPtr+= strlen(bufPtr)-1;
16327f654740f2a26ad62a5c155af9199af9e69b889claireho#else
16427f654740f2a26ad62a5c155af9199af9e69b889claireho      *bufPtr = '%';
16527f654740f2a26ad62a5c155af9199af9e69b889claireho#endif
16627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
16727f654740f2a26ad62a5c155af9199af9e69b889claireho    bufPtr++;
16827f654740f2a26ad62a5c155af9199af9e69b889claireho    c = UTEXT_NEXT32(text);
16927f654740f2a26ad62a5c155af9199af9e69b889claireho  }
17027f654740f2a26ad62a5c155af9199af9e69b889claireho  *bufPtr = 0;
17127f654740f2a26ad62a5c155af9199af9e69b889claireho#if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY)
17227f654740f2a26ad62a5c155af9199af9e69b889claireho  char *ebuf = (char*)malloc(bufLen);
17327f654740f2a26ad62a5c155af9199af9e69b889claireho  uprv_eastrncpy((unsigned char*)ebuf, (const unsigned char*)buf, bufLen);
17427f654740f2a26ad62a5c155af9199af9e69b889claireho  uprv_strncpy(buf, ebuf, bufLen);
17527f654740f2a26ad62a5c155af9199af9e69b889claireho  free((void*)ebuf);
17627f654740f2a26ad62a5c155af9199af9e69b889claireho#endif
17727f654740f2a26ad62a5c155af9199af9e69b889claireho  utext_setNativeIndex(text, oldIndex);
17827f654740f2a26ad62a5c155af9199af9e69b889claireho}
17927f654740f2a26ad62a5c155af9199af9e69b889claireho
180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic inline UChar toHex(int32_t i) {
181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return (UChar)(i + (i < 10 ? 0x30 : (0x41 - 10)));
182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic UnicodeString& escape(const UnicodeString& s, UnicodeString& result) {
185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for (int32_t i=0; i<s.length(); ++i) {
186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UChar c = s[i];
187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if ((c <= (UChar)0x7F) && (c>0)) {
188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result += c;
189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result += (UChar)0x5c;
191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result += (UChar)0x75;
192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result += toHex((c >> 12) & 0xF);
193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result += toHex((c >>  8) & 0xF);
194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result += toHex((c >>  4) & 0xF);
195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            result += toHex( c        & 0xF);
196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return result;
199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic char ASSERT_BUF[1024];
202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic const char* extractToAssertBuf(const UnicodeString& message) {
204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  if(message.length()==0) {
205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    strcpy(ASSERT_BUF, "[[empty UnicodeString]]");
206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  } else {
207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString buf;
208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    escape(message, buf);
209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(buf.length()==0) {
210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho      strcpy(ASSERT_BUF, "[[escape() returned 0 chars]]");
211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho      buf.extract(0, 0x7FFFFFFF, ASSERT_BUF, sizeof(ASSERT_BUF)-1);
213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho      if(ASSERT_BUF[0]==0) {
214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ASSERT_BUF[0]=0;
215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        for(int32_t i=0;i<buf.length();i++) {
216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho          UChar ch = buf[i];
217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho          sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch);
218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho      }
220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  }
222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0;
223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  return ASSERT_BUF;
224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
22727f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);}
22827f654740f2a26ad62a5c155af9199af9e69b889claireho
22927f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure.  status=%s", \
23027f654740f2a26ad62a5c155af9199af9e69b889claireho                                                              __FILE__, __LINE__, u_errorName(status)); return;}}
23127f654740f2a26ad62a5c155af9199af9e69b889claireho
23227f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};}
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\
2356d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queruif (status!=errcode) {dataerrln("RegexTest failure at line %d.  Expected status=%s, got %s", \
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    __LINE__, u_errorName(errcode), u_errorName(status));};}
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "RegexTest failure at line %d, from %d.  status=%d\n",__LINE__, (line), status); }}
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}}
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define REGEX_ASSERT_UNISTR(ustr,inv) {if (!(ustr==inv)) {errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s,%s) failed \n", __FILE__, __LINE__, extractToAssertBuf(ustr),inv);};}
245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
24627f654740f2a26ad62a5c155af9199af9e69b889claireho/**
24727f654740f2a26ad62a5c155af9199af9e69b889claireho * @param expected expected text in UTF-8 (not platform) codepage
24827f654740f2a26ad62a5c155af9199af9e69b889claireho */
24950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::assertUText(const char *expected, UText *actual, const char *file, int line) {
25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode status = U_ZERO_ERROR;
25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText expectedText = UTEXT_INITIALIZER;
25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&expectedText, expected, -1, &status);
25327f654740f2a26ad62a5c155af9199af9e69b889claireho    if(U_FAILURE(status)) {
25427f654740f2a26ad62a5c155af9199af9e69b889claireho      errln("%s:%d: assertUText: error %s calling utext_openUTF8(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected));
25527f654740f2a26ad62a5c155af9199af9e69b889claireho      return;
25627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
25727f654740f2a26ad62a5c155af9199af9e69b889claireho    if(utext_nativeLength(&expectedText)==0 && (strlen(expected)!=0)) {
25827f654740f2a26ad62a5c155af9199af9e69b889claireho      errln("%s:%d: assertUText:  expected is %d utf-8 bytes, but utext_nativeLength(expectedText) returned 0.", file, line, strlen(expected));
25927f654740f2a26ad62a5c155af9199af9e69b889claireho      return;
26027f654740f2a26ad62a5c155af9199af9e69b889claireho    }
26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_setNativeIndex(actual, 0);
26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (utext_compare(&expectedText, -1, actual, -1) != 0) {
26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        char buf[201 /*21*/];
26427f654740f2a26ad62a5c155af9199af9e69b889claireho        char expectedBuf[201];
26527f654740f2a26ad62a5c155af9199af9e69b889claireho        utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);
26627f654740f2a26ad62a5c155af9199af9e69b889claireho        utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText);
26727f654740f2a26ad62a5c155af9199af9e69b889claireho        errln("%s:%d: assertUText: Failure: expected \"%s\" (%d chars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual));
26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&expectedText);
27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
27127f654740f2a26ad62a5c155af9199af9e69b889claireho/**
27227f654740f2a26ad62a5c155af9199af9e69b889claireho * @param expected invariant (platform local text) input
27327f654740f2a26ad62a5c155af9199af9e69b889claireho */
27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
27527f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::assertUTextInvariant(const char *expected, UText *actual, const char *file, int line) {
27627f654740f2a26ad62a5c155af9199af9e69b889claireho    UErrorCode status = U_ZERO_ERROR;
27727f654740f2a26ad62a5c155af9199af9e69b889claireho    UText expectedText = UTEXT_INITIALIZER;
27827f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&expectedText, expected, -1, &status);
27927f654740f2a26ad62a5c155af9199af9e69b889claireho    if(U_FAILURE(status)) {
28027f654740f2a26ad62a5c155af9199af9e69b889claireho      errln("%s:%d: assertUTextInvariant: error %s calling regextst_openUTF8FromInvariant(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected));
28127f654740f2a26ad62a5c155af9199af9e69b889claireho      return;
28227f654740f2a26ad62a5c155af9199af9e69b889claireho    }
28327f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_setNativeIndex(actual, 0);
28427f654740f2a26ad62a5c155af9199af9e69b889claireho    if (utext_compare(&expectedText, -1, actual, -1) != 0) {
28527f654740f2a26ad62a5c155af9199af9e69b889claireho        char buf[201 /*21*/];
28627f654740f2a26ad62a5c155af9199af9e69b889claireho        char expectedBuf[201];
28727f654740f2a26ad62a5c155af9199af9e69b889claireho        utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);
28827f654740f2a26ad62a5c155af9199af9e69b889claireho        utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText);
28927f654740f2a26ad62a5c155af9199af9e69b889claireho        errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual));
29027f654740f2a26ad62a5c155af9199af9e69b889claireho    }
29127f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_close(&expectedText);
29227f654740f2a26ad62a5c155af9199af9e69b889claireho}
29327f654740f2a26ad62a5c155af9199af9e69b889claireho
29427f654740f2a26ad62a5c155af9199af9e69b889claireho/**
29527f654740f2a26ad62a5c155af9199af9e69b889claireho * Assumes utf-8 input
29627f654740f2a26ad62a5c155af9199af9e69b889claireho */
29727f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actual), __FILE__, __LINE__)
29827f654740f2a26ad62a5c155af9199af9e69b889claireho/**
29927f654740f2a26ad62a5c155af9199af9e69b889claireho * Assumes Invariant input
30027f654740f2a26ad62a5c155af9199af9e69b889claireho */
30127f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((expected), (actual), __FILE__, __LINE__)
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This buffer ( inv_buf ) is used to hold the UTF-8 strings
305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * passed into utext_openUTF8. An error will be given if
306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * INV_BUFSIZ is too small.  It's only used on EBCDIC systems.
307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define INV_BUFSIZ 2048 /* increase this if too small */
310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t inv_next=0;
312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if U_CHARSET_FAMILY!=U_ASCII_FAMILY
314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic char inv_buf[INV_BUFSIZ];
315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif
316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t length, UErrorCode *status) {
318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  if(length==-1) length=strlen(inv);
319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if U_CHARSET_FAMILY==U_ASCII_FAMILY
320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  inv_next+=length;
321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  return utext_openUTF8(ut, inv, length, status);
322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#else
323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  if(inv_next+length+1>INV_BUFSIZ) {
324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    fprintf(stderr, "%s:%d Error: INV_BUFSIZ #defined to be %d but needs to be at least %d.\n",
325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            __FILE__, __LINE__, INV_BUFSIZ, (inv_next+length+1));
326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    *status = U_MEMORY_ALLOCATION_ERROR;
327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return NULL;
328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  }
329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  unsigned char *buf = (unsigned char*)inv_buf+inv_next;
331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  uprv_aestrncpy(buf, (const uint8_t*)inv, length);
332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  inv_next+=length;
333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if 0
335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  fprintf(stderr, " Note: INV_BUFSIZ at %d, used=%d\n", INV_BUFSIZ, inv_next);
336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif
337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  return utext_openUTF8(ut, (const char*)buf, length, status);
339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif
340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    REGEX_TESTLM       Macro + invocation function to simplify writing quick tests
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                       for the LookingAt() and  Match() functions.
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//       usage:
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          REGEX_TESTLM("pattern",  "input text",  lookingAt expected, matches expected);
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          The expected results are UBool - TRUE or FALSE.
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          The input text is unescaped.  The pattern is not.
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define REGEX_TESTLM(pat, text, looking, match) {doRegexLMTest(pat, text, looking, match, __LINE__);doRegexLMTestUTF8(pat, text, looking, match, __LINE__);}
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line) {
360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const UnicodeString pattern(pat, -1, US_INV);
361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const UnicodeString inputText(text, -1, US_INV);
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode          status  = U_ZERO_ERROR;
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError         pe;
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        *REPattern = NULL;
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher        *REMatcher = NULL;
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool               retVal     = TRUE;
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeString patString(pat, -1, US_INV);
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REPattern = RegexPattern::compile(patString, 0, pe, status);
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
3716d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        dataerrln("RegexTest failure in RegexPattern::compile() at line %d.  Status = %s",
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            line, u_errorName(status));
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (line==376) { RegexPatternDump(REPattern);}
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString inputString(inputText);
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString unEscapedInput = inputString.unescape();
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REMatcher = REPattern->matcher(unEscapedInput, status);
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("RegexTest failure in REPattern::matcher() at line %d.  Status = %s\n",
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            line, u_errorName(status));
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool actualmatch;
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    actualmatch = REMatcher->lookingAt(status);
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("RegexTest failure in lookingAt() at line %d.  Status = %s\n",
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            line, u_errorName(status));
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retVal =  FALSE;
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (actualmatch != looking) {
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("RegexTest: wrong return from lookingAt() at line %d.\n", line);
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retVal = FALSE;
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    actualmatch = REMatcher->matches(status);
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("RegexTest failure in matches() at line %d.  Status = %s\n",
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            line, u_errorName(status));
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retVal = FALSE;
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (actualmatch != match) {
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("RegexTest: wrong return from matches() at line %d.\n", line);
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retVal = FALSE;
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (retVal == FALSE) {
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPatternDump(REPattern);
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete REPattern;
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete REMatcher;
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return retVal;
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
42050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool looking, UBool match, int32_t line) {
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText               pattern    = UTEXT_INITIALIZER;
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             inputUTF8Length;
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char                *textChars = NULL;
42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText               inputText  = UTEXT_INITIALIZER;
42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode          status     = U_ZERO_ERROR;
42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError         pe;
42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *REPattern = NULL;
42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher        *REMatcher = NULL;
42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool               retVal     = TRUE;
43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
43127f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&pattern, pat, -1, &status);
43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REPattern = RegexPattern::compile(&pattern, 0, pe, status);
43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8).  Status = %s\n",
43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            line, u_errorName(status));
43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString inputString(text, -1, US_INV);
44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString unEscapedInput = inputString.unescape();
44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status));
44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status);
44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // UTF-8 does not allow unpaired surrogates, so this could actually happen
44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        logln("RegexTest unable to convert input to UTF8 at line %d.  Status = %s\n", line, u_errorName(status));
44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE; // not a failure of the Regex engine
44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR; // buffer overflow
45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    textChars = new char[inputUTF8Length+1];
45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(), status);
45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&inputText, textChars, inputUTF8Length, &status);
45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
455b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REMatcher = &REPattern->matcher(status)->reset(&inputText);
45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("RegexTest failure in REPattern::matcher() at line %d (UTF8).  Status = %s\n",
45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            line, u_errorName(status));
45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool actualmatch;
46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    actualmatch = REMatcher->lookingAt(status);
46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("RegexTest failure in lookingAt() at line %d (UTF8).  Status = %s\n",
46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            line, u_errorName(status));
46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retVal =  FALSE;
46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (actualmatch != looking) {
47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("RegexTest: wrong return from lookingAt() at line %d (UTF8).\n", line);
47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retVal = FALSE;
47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    actualmatch = REMatcher->matches(status);
47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("RegexTest failure in matches() at line %d (UTF8).  Status = %s\n",
47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            line, u_errorName(status));
47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retVal = FALSE;
48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (actualmatch != match) {
48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", line);
48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retVal = FALSE;
48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (retVal == FALSE) {
48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPatternDump(REPattern);
48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REPattern;
49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REMatcher;
49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&inputText);
49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&pattern);
49450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete[] textChars;
49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return retVal;
49650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    REGEX_ERR       Macro + invocation function to simplify writing tests
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                       regex tests for incorrect patterns
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//       usage:
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          REGEX_ERR("pattern",   expected error line, column, expected status);
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ERR(pat, line, col, status) regex_err(pat, line, col, status, __LINE__);
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol,
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          UErrorCode expectedStatus, int32_t line) {
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString       pattern(pat);
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode          status         = U_ZERO_ERROR;
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError         pe;
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        *callerPattern = NULL;
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Compile the caller's pattern
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString patString(pat);
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    callerPattern = RegexPattern::compile(patString, 0, pe, status);
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (status != expectedStatus) {
5256d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status));
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (status != U_ZERO_ERROR) {
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (pe.line != errLine || pe.offset != errCol) {
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("Line %d: incorrect line/offset from UParseError.  Expected %d/%d; got %d/%d.\n",
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    line, errLine, errCol, pe.line, pe.offset);
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete callerPattern;
53650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Compile again, using a UTF-8-based UText
53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText patternText = UTEXT_INITIALIZER;
54127f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&patternText, pat, -1, &status);
54250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    callerPattern = RegexPattern::compile(&patternText, 0, pe, status);
54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (status != expectedStatus) {
54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status));
54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
54650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status != U_ZERO_ERROR) {
54750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (pe.line != errLine || pe.offset != errCol) {
54850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                errln("Line %d: incorrect line/offset from UParseError.  Expected %d/%d; got %d/%d.\n",
54950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    line, errLine, errCol, pe.line, pe.offset);
55050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
55150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete callerPattern;
55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&patternText);
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      Basic      Check for basic functionality of regex pattern matching.
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                 Avoid the use of REGEX_FIND test macro, which has
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                 substantial dependencies on basic Regex functionality.
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::Basic() {
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Debug - slide failing test cases early
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE);
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError pe;
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode  status = U_ZERO_ERROR;
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern::compile("^(?:a?b?)*$", 0, pe, status);
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // REGEX_FIND("(?>(abc{2,4}?))(c*)", "<0>ab<1>cc</1><2>ccc</2></0>ddd");
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX====================");
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exit(1);
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Pattern with parentheses
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)ring", "stabcring thing", TRUE,  FALSE);
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)ring", "stabcring",       TRUE,  TRUE);
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)ring", "stabcrung",       FALSE, FALSE);
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Patterns with *
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)*ring", "string", TRUE, TRUE);
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)*ring", "stabcring", TRUE, TRUE);
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)*ring", "stabcabcring", TRUE, TRUE);
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)*ring", "stabcabcdring", FALSE, FALSE);
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)*ring", "stabcabcabcring etc.", TRUE, FALSE);
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a*", "",  TRUE, TRUE);
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a*", "b", TRUE, FALSE);
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Patterns with "."
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".", "abc", TRUE, FALSE);
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("...", "abc", TRUE, TRUE);
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("....", "abc", FALSE, FALSE);
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".*", "abcxyz123", TRUE, TRUE);
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab.*xyz", "abcdefghij", FALSE, FALSE);
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab.*xyz", "abcdefg...wxyz", TRUE, TRUE);
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz", TRUE, TRUE);
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz...", TRUE, FALSE);
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Patterns with * applied to chars at end of literal string
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("abc*", "ab", TRUE, TRUE);
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("abc*", "abccccc", TRUE, TRUE);
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Supplemental chars match as single chars, not a pair of surrogates.
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".", "\\U00011000", TRUE, TRUE);
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("...", "\\U00011000x\\U00012002", TRUE, TRUE);
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("...", "\\U00011000x\\U00012002y", TRUE, FALSE);
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  UnicodeSets in the pattern
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[1-6]", "1", TRUE, TRUE);
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[1-6]", "3", TRUE, TRUE);
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[1-6]", "7", FALSE, FALSE);
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a[1-6]b", "a3b", TRUE, TRUE);
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a[0-9]*b", "a123b", TRUE, TRUE);
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a[0-9]*b", "abc", TRUE, FALSE);
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[\\p{Nd}]*", "123456", TRUE, TRUE);
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[\\p{Nd}]*", "a123456", TRUE, FALSE);   // note that * matches 0 occurences.
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[a][b][[:Zs:]]*", "ab   ", TRUE, TRUE);
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   OR operator in patterns
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|b)", "a", TRUE, TRUE);
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|b)", "b", TRUE, TRUE);
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|b)", "c", FALSE, FALSE);
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a|b", "b", TRUE, TRUE);
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabc", TRUE, TRUE);
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabdc", TRUE, FALSE);
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "ac", TRUE, TRUE);
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "123", TRUE, TRUE);
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "123", TRUE, TRUE);
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "222211111czzzzw", TRUE, FALSE);
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  +
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab+", "abbc", TRUE, FALSE);
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab+c", "ac", FALSE, FALSE);
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("b+", "", FALSE, FALSE);
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(abc|def)+", "defabc", TRUE, TRUE);
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".+y", "zippity dooy dah ", TRUE, FALSE);
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".+y", "zippity dooy", TRUE, TRUE);
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   ?
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab?", "ab", TRUE, TRUE);
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab?", "a", TRUE, TRUE);
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab?", "ac", TRUE, FALSE);
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab?", "abb", TRUE, FALSE);
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a(b|c)?d", "abd", TRUE, TRUE);
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a(b|c)?d", "acd", TRUE, TRUE);
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a(b|c)?d", "ad", TRUE, TRUE);
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a(b|c)?d", "abcd", FALSE, FALSE);
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a(b|c)?d", "ab", FALSE, FALSE);
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Escape sequences that become single literal chars, handled internally
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   by ICU's Unescape.
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // REGEX_TESTLM("\101\142", "Ab", TRUE, TRUE);      // Octal     TODO: not implemented yet.
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\a", "\\u0007", TRUE, TRUE);        // BEL
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\cL", "\\u000c", TRUE, TRUE);       // Control-L
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\e", "\\u001b", TRUE, TRUE);        // Escape
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\f", "\\u000c", TRUE, TRUE);        // Form Feed
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\n", "\\u000a", TRUE, TRUE);        // new line
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\r", "\\u000d", TRUE, TRUE);        //  CR
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\t", "\\u0009", TRUE, TRUE);        // Tab
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\u1234", "\\u1234", TRUE, TRUE);
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\U00001234", "\\u1234", TRUE, TRUE);
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".*\\Ax", "xyz", TRUE, FALSE);  //  \A matches only at the beginning of input
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".*\\Ax", " xyz", FALSE, FALSE);  //  \A matches only at the beginning of input
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Escape of special chars in patterns
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\\\\\|\\(\\)\\[\\{\\~\\$\\*\\+\\?\\.", "\\\\|()[{~$*+?.", TRUE, TRUE);
70750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
71050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
71150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
71250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    UTextBasic   Check for quirks that are specific to the UText
71350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                 implementation.
71450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
71550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
71650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::UTextBasic() {
71727f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
71850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode status = U_ZERO_ERROR;
71950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText pattern = UTEXT_INITIALIZER;
72027f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&pattern, str_abc, -1, &status);
72150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher matcher(&pattern, 0, status);
72250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
72350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
72450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText input = UTEXT_INITIALIZER;
72527f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&input, str_abc, -1, &status);
72650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
72750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher.reset(&input);
72850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
72927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
73050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
73150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher.reset(matcher.inputText());
73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
73327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
73450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
73550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&pattern);
73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&input);
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      API_Match   Test that the API for class RegexMatcher
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                  is present and nominally working, but excluding functions
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                  implementing replace operations.
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Match() {
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError         pe;
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode          status=U_ZERO_ERROR;
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t             flags = 0;
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Debug - slide failing test cases early
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return;
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Simple pattern compilation
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString       re("abc");
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern        *pat2;
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pat2 = RegexPattern::compile(re, flags, pe, status);
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString inStr1 = "abcdef this is a test";
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString instr2 = "not abc";
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString empty  = "";
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Matcher creation and reset.
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher *m1 = pat2->matcher(inStr1, status);
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->input() == inStr1);
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(instr2);
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->input() == instr2);
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(inStr1);
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->input() == inStr1);
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(empty);
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->input() == empty);
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(&m1->pattern() == pat2);
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  reset(pos, status)
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(inStr1);
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(4, status);
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->input() == inStr1);
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(-1, status);
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(0, status);
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t len = m1->input().length();
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(len-1, status);
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(len, status);
81627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
81727f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
81827f654740f2a26ad62a5c155af9199af9e69b889claireho
81927f654740f2a26ad62a5c155af9199af9e69b889claireho        m1->reset(len+1, status);
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // match(pos, status)
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(instr2);
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(4, status) == TRUE);
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset();
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(3, status) == FALSE);
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset();
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(5, status) == FALSE);
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(4, status) == TRUE);
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(-1, status) == FALSE);
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Match() at end of string should fail, but should not
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  be an error.
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        len = m1->input().length();
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(len, status) == FALSE);
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Match beyond end of string should fail with an error.
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(len+1, status) == FALSE);
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Successful match at end of string.
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status = U_ZERO_ERROR;
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            RegexMatcher m("A?", 0, status);  // will match zero length string.
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_CHECK_STATUS;
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            m.reset(inStr1);
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            len = inStr1.length();
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.matches(len, status) == TRUE);
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_CHECK_STATUS;
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            m.reset(empty);
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.matches(0, status) == TRUE);
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_CHECK_STATUS;
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // lookingAt(pos, status)
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(instr2);  // "not abc"
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(5, status) == FALSE);
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(3, status) == FALSE);
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE);
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        len = m1->input().length();
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(len, status) == FALSE);
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(len+1, status) == FALSE);
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete m1;
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete pat2;
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Capture Group.
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     RegexMatcher::start();
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     RegexMatcher::end();
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     RegexMatcher::groupCount();
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t             flags=0;
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError         pe;
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          status=U_ZERO_ERROR;
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString       re("01(23(45)67)(.*)");
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString data = "0123456789";
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher *matcher = pat->matcher(data, status);
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->lookingAt(status) == TRUE);
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        static const int32_t matchStarts[] = {0,  2, 4, 8};
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        static const int32_t matchEnds[]   = {10, 8, 6, 10};
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t i;
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; i<4; i++) {
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t actualStart = matcher->start(i, status);
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_CHECK_STATUS;
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (actualStart != matchStarts[i]) {
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("RegexTest failure at line %d, index %d.  Expected %d, got %d\n",
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    __LINE__, i, matchStarts[i], actualStart);
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t actualEnd = matcher->end(i, status);
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_CHECK_STATUS;
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (actualEnd != matchEnds[i]) {
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("RegexTest failure at line %d index %d.  Expected %d, got %d\n",
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    __LINE__, i, matchEnds[i], actualEnd);
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(0, status) == matcher->start(status));
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->end(0, status) == matcher->end(status));
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->reset();
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->lookingAt(status);
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->group(status)    == "0123456789");
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->group(0, status) == "0123456789");
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->group(1, status) == "234567"    );
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->group(2, status) == "45"        );
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->group(3, status) == "89"        );
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->reset();
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete matcher;
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete pat;
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  find
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t             flags=0;
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError         pe;
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          status=U_ZERO_ERROR;
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString       re("abc");
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString data = ".abc..abc...abc..";
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                    012345678901234567
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher *matcher = pat->matcher(data, status);
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 1);
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 6);
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 12);
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find() == FALSE);
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find() == FALSE);
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->reset();
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 1);
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(0, status));
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 1);
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(1, status));
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 1);
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(2, status));
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 6);
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(12, status));
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 12);
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(13, status) == FALSE);
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(16, status) == FALSE);
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(17, status) == FALSE);
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE);
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR);
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->groupCount() == 0);
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete matcher;
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete pat;
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  find, with \G in pattern (true if at the end of a previous match).
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t             flags=0;
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError         pe;
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          status=U_ZERO_ERROR;
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString       re(".*?(?:(\\Gabc)|(abc))", -1, US_INV);
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString data = ".abcabc.abc..";
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                    012345678901234567
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher *matcher = pat->matcher(data, status);
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 0);
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(1, status) == -1);
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(2, status) == 1);
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 4);
1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(1, status) == 4);
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(2, status) == -1);
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete matcher;
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete pat;
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   find with zero length matches, match position should bump ahead
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     to prevent loops.
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t                 i;
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          status=U_ZERO_ERROR;
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher        m("(?= ?)", 0, status);   // This pattern will zero-length matches anywhere,
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                      //   using an always-true look-ahead.
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString s("    ");
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(s);
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; ; i++) {
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (m.find() == FALSE) {
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.start(status) == i);
1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.end(status) == i);
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(i==5);
1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Check that the bump goes over surrogate pairs OK
1055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s = UNICODE_STRING_SIMPLE("\\U00010001\\U00010002\\U00010003\\U00010004");
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s = s.unescape();
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(s);
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; ; i+=2) {
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (m.find() == FALSE) {
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.start(status) == i);
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.end(status) == i);
1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(i==10);
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // find() loop breaking test.
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //        with pattern of /.?/, should see a series of one char matches, then a single
1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //        match of zero length at the end of the input string.
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t                 i;
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          status=U_ZERO_ERROR;
1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher        m(".?", 0, status);
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString s("    ");
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(s);
1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; ; i++) {
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (m.find() == FALSE) {
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.start(status) == i);
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));
1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(i==5);
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Matchers with no input string behave as if they had an empty input string.
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher  m(".?", 0, status);
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m.find());
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m.start(status) == 0);
1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m.input() == "");
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern  *p = RegexPattern::compile(".", 0, status);
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher  *m = p->matcher(status);
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m->find() == FALSE);
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m->input() == "");
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete m;
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete p;
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // Regions
1114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString testString("This is test data");
1118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher m(".*", testString,  0, status);
1119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionStart() == 0);
1121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionEnd() == testString.length());
1122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        m.region(2,4, status);
1126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.matches(status));
1128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.start(status)==2);
1129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.end(status)==4);
1130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        m.reset();
1133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionStart() == 0);
1134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionEnd() == testString.length());
1135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString shorterString("short");
1137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        m.reset(shorterString);
1138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionStart() == 0);
1139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionEnd() == shorterString.length());
1140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
1143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
1144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.reset());
1145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
1146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
1148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.reset());
1150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
1154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
1155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.reset());
1156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
1157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));
1159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.reset());
1161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // hitEnd() and requireEnd()
1167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString testString("aabb");
1171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher m1(".*", testString,  0, status);
1172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m1.lookingAt(status) == TRUE);
1173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m1.hitEnd() == TRUE);
1174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m1.requireEnd() == FALSE);
1175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
1178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher m2("a*", testString, 0, status);
1179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m2.lookingAt(status) == TRUE);
1180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m2.hitEnd() == FALSE);
1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m2.requireEnd() == FALSE);
1182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
1185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher m3(".*$", testString, 0, status);
1186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m3.lookingAt(status) == TRUE);
1187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m3.hitEnd() == TRUE);
1188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m3.requireEnd() == TRUE);
1189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Compilation error on reset with UChar *
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   These were a hazard that people were stumbling over with runtime errors.
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Changed them to compiler errors by adding private methods that more closely
1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   matched the incorrect use of the functions.
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar ucharString[20];
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher m(".", 0, status);
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(ucharString);  // should not compile.
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern *p = RegexPattern::compile(".", 0, status);
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher *m2 = p->matcher(ucharString, status);    //  should not compile.
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher m3(".", ucharString, 0, status);  //  Should not compile
1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //  Time Outs.
1215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //       Note:  These tests will need to be changed when the regexp engine is
1216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //              able to detect and cut short the exponential time behavior on
1217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //              this type of match.
1218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //    Enough 'a's in the string to cause the match to time out.
1222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //       (Each on additonal 'a' doubles the time)
1223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa");
1224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher("(a+)+b", testString, 0, status);
1225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getTimeLimit() == 0);
1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setTimeLimit(100, status);
1228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getTimeLimit() == 100);
1229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(status == U_REGEX_TIME_OUT);
1231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   Few enough 'a's to slip in under the time limit.
1235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString testString("aaaaaaaaaaaaaaaaaa");
1236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher("(a+)+b", testString, 0, status);
1237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setTimeLimit(100, status);
1239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //  Stack Limits
1245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
124850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString testString(1000000, 0x41, 1000000);  // Length 1,000,000, filled with 'A'
1249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations
1251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   of the '+', and makes the stack frames larger.
1252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher("(A)+A$", testString, 0, status);
1253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // With the default stack, this match should fail to run
1255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
1257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // With unlimited stack, it should run
1259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
1260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setStackLimit(0, status);
1261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.lookingAt(status) == TRUE);
1263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getStackLimit() == 0);
1265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // With a limited stack, it the match should fail
1267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
1268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setStackLimit(10000, status);
1269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
1271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getStackLimit() == 10000);
1272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // A pattern that doesn't save state should work with
1275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   a minimal sized stack
1276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString testString = "abc";
1279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher("abc", testString, 0, status);
1280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setStackLimit(30, status);
1282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.matches(status) == TRUE);
1284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getStackLimit() == 30);
1286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Negative stack sizes should fail
1288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
1289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setStackLimit(1000, status);
1290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setStackLimit(-1, status);
1292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getStackLimit() == 1000);
1294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      API_Replace        API test for class RegexMatcher, testing the
1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                         Replace family of functions.
1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Replace() {
1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Replace
1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t             flags=0;
1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError         pe;
1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode          status=U_ZERO_ERROR;
1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString       re("abc");
1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString data = ".abc..abc...abc..";
1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //                    012345678901234567
1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *matcher = pat->matcher(data, status);
1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Plain vanilla matches.
1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString  dest;
1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceFirst("yz", status);
1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == ".yz..abc...abc..");
1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceAll("yz", status);
1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == ".yz..yz...yz..");
1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Plain vanilla non-matches.
1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString d2 = ".abx..abx...abx..";
1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    matcher->reset(d2);
1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceFirst("yz", status);
1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == ".abx..abx...abx..");
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceAll("yz", status);
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == ".abx..abx...abx..");
1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Empty source string
1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString d3 = "";
1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    matcher->reset(d3);
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceFirst("yz", status);
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "");
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceAll("yz", status);
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "");
1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Empty substitution string
1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    matcher->reset(data);              // ".abc..abc...abc.."
1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceFirst("", status);
1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "...abc...abc..");
1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceAll("", status);
1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "........");
1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // match whole string
1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString d4 = "abc";
1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    matcher->reset(d4);
1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceFirst("xyz", status);
1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "xyz");
1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceAll("xyz", status);
1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "xyz");
1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Capture Group, simple case
1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString       re2("a(..)");
1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern *pat2 = RegexPattern::compile(re2, flags, pe, status);
1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString d5 = "abcdefg";
1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *matcher2 = pat2->matcher(d5, status);
1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher2->replaceFirst("$1$1", status);
1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "bcbcdefg");
1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1."), status);
1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "The value of $1 is bc.defg");
1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher2->replaceFirst("$ by itself, no group number $$$", status);
1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "$ by itself, no group number $$$defg");
1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U0001D7CF.");
1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    replacement = replacement.unescape();
1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher2->replaceFirst(replacement, status);
1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "Supplemental Digit 1 bc.defg");
1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT_FAIL(matcher2->replaceFirst("bad capture group number $5...",status), U_INDEX_OUTOFBOUNDS_ERROR);
1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Replacement String with \u hex escapes
1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString  src = "abc 1 abc 2 abc 3";
1423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString  substitute = UNICODE_STRING_SIMPLE("--\\u0043--");
1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->reset(src);
1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString  result = matcher->replaceAll(substitute, status);
1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == "--C-- 1 --C-- 2 --C-- 3");
1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString  src = "abc !";
1431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString  substitute = UNICODE_STRING_SIMPLE("--\\U00010000--");
1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->reset(src);
1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString  result = matcher->replaceAll(substitute, status);
1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString expected = UnicodeString("--");
1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expected.append((UChar32)0x10000);
1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expected.append("-- !");
1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == expected);
1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // TODO:  need more through testing of capture substitutions.
1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Bug 4057
1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString s = "The matches start with ss and end with ee ss stuff ee fin";
1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher m("ss(.*?)ee", 0, status);
1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString result;
1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Multiple finds do NOT bump up the previous appendReplacement postion.
1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(s);
1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find();
1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find();
1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.appendReplacement(result, "ooh", status);
1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh");
1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // After a reset into the interior of a string, appendReplacemnt still starts at beginning.
1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.truncate(0);
1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(10, status);
1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find();
1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find();
1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.appendReplacement(result, "ooh", status);
1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh");
1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // find() at interior of string, appendReplacemnt still starts at beginning.
1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.truncate(0);
1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset();
1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find(10, status);
1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find();
1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.appendReplacement(result, "ooh", status);
1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh");
1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.appendTail(result);
1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh fin");
1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete matcher2;
1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat2;
1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete matcher;
1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat;
1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      API_Pattern       Test that the API for class RegexPattern is
1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                        present and nominally working.
1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Pattern() {
1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        pata;    // Test default constructor to not crash.
1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        patb;
1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pata == patb);
1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pata == pata);
1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString re1("abc[a-l][m-z]");
1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString re2("def");
1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode    status = U_ZERO_ERROR;
1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError   pe;
1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        *pat1 = RegexPattern::compile(re1, 0, pe, status);
1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        *pat2 = RegexPattern::compile(re2, 0, pe, status);
1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1 == *pat1);
1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1 != pata);
1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Assign
1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    patb = *pat1;
1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(patb == *pat1);
1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Copy Construct
1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern patc(*pat1);
1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(patc == *pat1);
1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(patb == patc);
1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1 != pat2);
1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    patb = *pat2;
1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(patb != patc);
1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(patb == *pat2);
1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Compile with no flags.
1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern         *pat1a = RegexPattern::compile(re1, pe, status);
1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1a == *pat1);
1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1a->flags() == 0);
1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Compile with different flags should be not equal
1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        *pat1b = RegexPattern::compile(re1, UREGEX_CASE_INSENSITIVE, pe, status);
1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1b != *pat1a);
1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE);
1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1a->flags() == 0);
1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1b;
1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // clone
1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern *pat1c = pat1->clone();
1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1c == *pat1);
1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1c != *pat2);
1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1c;
1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1a;
1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat2;
1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Verify that a matcher created from a cloned pattern works.
1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     (Jitterbug 3423)
1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode     status     = U_ZERO_ERROR;
1560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexPattern  *pSource    = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\p{L}+"), 0, status);
1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern  *pClone     = pSource->clone();
1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete         pSource;
1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher  *mFromClone = pClone->matcher(status);
1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString s = "Hello World";
1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        mFromClone->reset(s);
1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(mFromClone->find() == TRUE);
1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(mFromClone->group(status) == "Hello");
1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(mFromClone->find() == TRUE);
1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(mFromClone->group(status) == "World");
1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(mFromClone->find() == FALSE);
1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete mFromClone;
1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete pClone;
1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   matches convenience API
1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches(".*", "random input", pe, status) == TRUE);
1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);
1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);
1584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);
1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);
1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_INDEX_OUTOFBOUNDS_ERROR;
1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);
1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Split()
1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat1 = RegexPattern::compile(" +",  pe, status);
1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString  fields[10];
1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n;
1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("Now is the time", fields, 10, status);
1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==4);
1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="Now");
1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="is");
1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="the");
1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="time");
1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="");
1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("Now is the time", fields, 2, status);
1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==2);
1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="Now");
1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="is the time");
1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="the");   // left over from previous test
1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fields[1] = "*";
1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("Now is the time", fields, 1, status);
1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==1);
1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="Now is the time");
1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="*");
1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("    Now       is the time   ", fields, 10, status);
1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1630b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==6);
1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="");
1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="Now");
1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="is");
1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="the");
1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="time");
1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[5]=="");
1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("     ", fields, 10, status);
1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1640b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==2);
1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="");
1642b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[1]=="");
1643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fields[0] = "foo";
1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("", fields, 10, status);
1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==0);
1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="foo");
1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  split, with a pattern with (capture)
1653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    pat1 = RegexPattern::compile(UNICODE_STRING_SIMPLE("<(\\w*)>"),  pe, status);
1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status);
1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1659b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==7);
1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="");
1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="b");
1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="the time");
1665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[5]=="c");
1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[6]=="");
1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(status==U_ZERO_ERROR);
1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("  <a>Now is <b>the time<c>", fields, 10, status);
1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1671b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==7);
1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="  ");
1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="b");
1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="the time");
1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[5]=="c");
1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[6]=="");
1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fields[6] = "foo";
1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("  <a>Now is <b>the time<c>", fields, 6, status);
1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==6);
1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="  ");
1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="b");
1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="the time");
1690b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[5]=="");  // All text following "<c>" field delimiter.
1691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[6]=="foo");
1692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fields[5] = "foo";
1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("  <a>Now is <b>the time<c>", fields, 5, status);
1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==5);
1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="  ");
1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="b");
1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="the time<c>");
1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[5]=="foo");
1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fields[5] = "foo";
1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("  <a>Now is <b>the time", fields, 5, status);
1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==5);
1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="  ");
1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="b");
1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="the time");
1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[5]=="foo");
1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("  <a>Now is <b>the time<c>", fields, 4, status);
1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==4);
1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="  ");
1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="the time<c>");
1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat1 = RegexPattern::compile("([-,])",  pe, status);
1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("1-10,20", fields, 10, status);
1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==5);
1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="1");
1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="-");
1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="10");
1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]==",");
1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="20");
1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1740b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Test split of string with empty trailing fields
1741b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    pat1 = RegexPattern::compile(",", pe, status);
1742b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
1743b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    n = pat1->split("a,b,c,", fields, 10, status);
1744b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
1745b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==4);
1746b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[0]=="a");
1747b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[1]=="b");
1748b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[2]=="c");
1749b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[3]=="");
1750b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1751b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    n = pat1->split("a,,,", fields, 10, status);
1752b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
1753b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==4);
1754b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[0]=="a");
1755b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[1]=="");
1756b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[2]=="");
1757b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[3]=="");
1758b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete pat1;
1759b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1760b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Split Separator with zero length match.
1761b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    pat1 = RegexPattern::compile(":?", pe, status);
1762b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
1763b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    n = pat1->split("abc", fields, 10, status);
1764b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
1765b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==5);
1766b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[0]=="");
1767b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[1]=="a");
1768b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[2]=="b");
1769b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[3]=="c");
1770b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[4]=="");
1771b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1772b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete pat1;
1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // RegexPattern::pattern()
1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat1 = new RegexPattern();
1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1->pattern() == "");
1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat1 = RegexPattern::compile("(Hello, world)*",  pe, status);
1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1->pattern() == "(Hello, world)*");
1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // classID functions
1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat1 = RegexPattern::compile("(Hello, world)*",  pe, status);
1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1->getDynamicClassID() == RegexPattern::getStaticClassID());
1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1->getDynamicClassID() != NULL);
1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString Hello("Hello, world.");
1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *m = pat1->matcher(Hello, status);
1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1->getDynamicClassID() != m->getDynamicClassID());
1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(m->getDynamicClassID() == RegexMatcher::getStaticClassID());
1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(m->getDynamicClassID() != NULL);
1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete m;
1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
180650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//      API_Match_UTF8   Test that the alternate engine for class RegexMatcher
180750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       is present and working, but excluding functions
180850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       implementing replace operations.
1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
181150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Match_UTF8() {
181250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError         pe;
181350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode          status=U_ZERO_ERROR;
181450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             flags = 0;
1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
181750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Debug - slide failing test cases early
1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
181950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if 0
182050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
182250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return;
182350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
182650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Simple pattern compilation
1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
182850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
182950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText               re = UTEXT_INITIALIZER;
183027f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&re, "abc", -1, &status);
1831b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        REGEX_VERBOSE_TEXT(&re);
183250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern        *pat2;
183350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pat2 = RegexPattern::compile(&re, flags, pe, status);
183450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
183650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText input1 = UTEXT_INITIALIZER;
183750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText input2 = UTEXT_INITIALIZER;
183850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText empty  = UTEXT_INITIALIZER;
183927f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &status);
184027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_VERBOSE_TEXT(&input1);
184127f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status);
184227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_VERBOSE_TEXT(&input2);
184350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUChars(&empty, NULL, 0, &status);
184450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
184527f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */
184650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t input2Len = strlen("not abc");
1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
184950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
185050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Matcher creation and reset.
185150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
1852b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1);
185350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
185450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
185527f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_abcdefthisisatest[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x00 }; /* abcdef this is a test */
185627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText());
185750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&input2);
185850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
185927f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_notabc[] = { 0x6e, 0x6f, 0x74, 0x20, 0x61, 0x62, 0x63, 0x00 }; /* not abc */
186027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_notabc, m1->inputText());
186150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&input1);
186227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText());
186350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
186450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&empty);
186550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
186650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(utext_nativeLength(&empty) == 0);
1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
186850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
186950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  reset(pos, status)
187050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
187150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&input1);
187250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(4, status);
187350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
187427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText());
187550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
1876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
187750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(-1, status);
187850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
188150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(0, status);
188250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
188350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
188550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(input1Len-1, status);
188650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
188750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
188850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
188950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(input1Len, status);
189027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
189127f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
189227f654740f2a26ad62a5c155af9199af9e69b889claireho
189327f654740f2a26ad62a5c155af9199af9e69b889claireho        m1->reset(input1Len+1, status);
189450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
189550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
189850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // match(pos, status)
1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
190050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&input2);
190150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(4, status) == TRUE);
190250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset();
190350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(3, status) == FALSE);
190450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset();
190550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(5, status) == FALSE);
190650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(4, status) == TRUE);
190750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(-1, status) == FALSE);
190850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
191050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Match() at end of string should fail, but should not
191150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  be an error.
191250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
191350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(input2Len, status) == FALSE);
191450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
191650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Match beyond end of string should fail with an error.
191750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
191850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(input2Len+1, status) == FALSE);
191950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
192150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Successful match at end of string.
192250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        {
192350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
192450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            RegexMatcher m("A?", 0, status);  // will match zero length string.
192550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS;
192650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            m.reset(&input1);
192750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.matches(input1Len, status) == TRUE);
192850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS;
192950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            m.reset(&empty);
193050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.matches(0, status) == TRUE);
193150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS;
1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
193650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // lookingAt(pos, status)
1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
193850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
193950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&input2);  // "not abc"
194050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
194150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(5, status) == FALSE);
194250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(3, status) == FALSE);
194350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
194450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE);
194550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
194650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
194750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE);
194850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
194950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE);
195050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
195250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete m1;
195350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pat2;
195450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
195550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&re);
195650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input1);
195750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input2);
195850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&empty);
195950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
196250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
196350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Capture Group.
196450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     RegexMatcher::start();
196550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     RegexMatcher::end();
196650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     RegexMatcher::groupCount();
196750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
196850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
196950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t             flags=0;
197050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UParseError         pe;
197150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode          status=U_ZERO_ERROR;
197250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText               re=UTEXT_INITIALIZER;
197327f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */
197427f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&re, str_01234567_pat, -1, &status);
197550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
197650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
197750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
197850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
197950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText input = UTEXT_INITIALIZER;
198027f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
198127f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_0123456789, -1, &status);
1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1983b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        RegexMatcher *matcher = &pat->matcher(status)->reset(&input);
198450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
198550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->lookingAt(status) == TRUE);
198650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        static const int32_t matchStarts[] = {0,  2, 4, 8};
198750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        static const int32_t matchEnds[]   = {10, 8, 6, 10};
198850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t i;
198950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (i=0; i<4; i++) {
199050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t actualStart = matcher->start(i, status);
199150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS;
199250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (actualStart != matchStarts[i]) {
199327f654740f2a26ad62a5c155af9199af9e69b889claireho                errln("RegexTest failure at %s:%d, index %d.  Expected %d, got %d\n",
199427f654740f2a26ad62a5c155af9199af9e69b889claireho                      __FILE__, __LINE__, i, matchStarts[i], actualStart);
199550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
199650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t actualEnd = matcher->end(i, status);
199750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS;
199850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (actualEnd != matchEnds[i]) {
199927f654740f2a26ad62a5c155af9199af9e69b889claireho                errln("RegexTest failure at %s:%d index %d.  Expected %d, got %d\n",
200027f654740f2a26ad62a5c155af9199af9e69b889claireho                      __FILE__, __LINE__, i, matchEnds[i], actualEnd);
200150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
200250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
200450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(0, status) == matcher->start(status));
200550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->end(0, status) == matcher->end(status));
2006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
200750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
200850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
200950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->reset();
201050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);
2011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
201250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->lookingAt(status);
201350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
201450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString dest;
201550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText destText = UTEXT_INITIALIZER;
201650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&destText, &dest, &status);
201750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText *result;
201827f654740f2a26ad62a5c155af9199af9e69b889claireho        //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
201927f654740f2a26ad62a5c155af9199af9e69b889claireho        //	Test shallow-clone API
202027f654740f2a26ad62a5c155af9199af9e69b889claireho        int64_t   group_len;
202127f654740f2a26ad62a5c155af9199af9e69b889claireho        result = matcher->group((UText *)NULL, group_len, status);
202250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
202327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
202450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
202527f654740f2a26ad62a5c155af9199af9e69b889claireho        result = matcher->group(0, &destText, group_len, status);
202650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
202750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
202827f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
202927f654740f2a26ad62a5c155af9199af9e69b889claireho        //  destText is now immutable, reopen it
203027f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_close(&destText);
203127f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUnicodeString(&destText, &dest, &status);
203250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
203350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(0, NULL, status);
203450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
203527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
203650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
203750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(0, &destText, status);
203850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
203950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
204027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
204150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
204250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(1, NULL, status);
204350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
204427f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */
204527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
204650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
204750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(1, &destText, status);
204850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
204950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
205027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
205150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
205250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(2, NULL, status);
205350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
205427f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */
205527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_45, result);
205650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
205750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(2, &destText, status);
205850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
205950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
206027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_45, result);
206150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
206250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(3, NULL, status);
206350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
206427f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */
206527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_89, result);
206650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
206750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(3, &destText, status);
206850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
206950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
207027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_89, result);
2071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
207250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
207350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
207450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->reset();
207550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);
2076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
207750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete matcher;
207850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pat;
207950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
208050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&destText);
208150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input);
208250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&re);
208350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
208650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  find
2087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
208850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
208950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t             flags=0;
209050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UParseError         pe;
209150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode          status=U_ZERO_ERROR;
209250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText               re=UTEXT_INITIALIZER;
209327f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
209427f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&re, str_abc, -1, &status);
2095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
209650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
209750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
209850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText input = UTEXT_INITIALIZER;
209927f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */
210027f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_abcabcabc, -1, &status);
210150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //                      012345678901234567
2102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        RegexMatcher *matcher = &pat->matcher(status)->reset(&input);
210450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
210550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
210650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 1);
210750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
210850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 6);
210950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
211050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 12);
211150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find() == FALSE);
211250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find() == FALSE);
2113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
211450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->reset();
211550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
211650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 1);
2117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
211850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(0, status));
211950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 1);
212050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(1, status));
212150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 1);
212250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(2, status));
212350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 6);
212450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(12, status));
212550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 12);
212650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(13, status) == FALSE);
212750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(16, status) == FALSE);
212850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(17, status) == FALSE);
212950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE);
2130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
213150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
213250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
213350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
213450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR);
2135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
213650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->groupCount() == 0);
213750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
213850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete matcher;
213950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pat;
214050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
214150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input);
214250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&re);
2143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
2144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
214550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
214750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  find, with \G in pattern (true if at the end of a previous match).
2148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
214950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
215050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t             flags=0;
215150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UParseError         pe;
215250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode          status=U_ZERO_ERROR;
215350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText               re=UTEXT_INITIALIZER;
215427f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x00 }; /* .*?(?:(\\Gabc)|(abc)) */
215527f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&re, str_Gabcabc, -1, &status);
2156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
215750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
215850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
216050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText input = UTEXT_INITIALIZER;
216127f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */
216227f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_abcabcabc, -1, &status);
216350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //                      012345678901234567
2164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        RegexMatcher *matcher = &pat->matcher(status)->reset(&input);
216650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
216750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
216850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 0);
216950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(1, status) == -1);
217050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(2, status) == 1);
2171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
217250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
217350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 4);
217450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(1, status) == 4);
217550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(2, status) == -1);
217650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
217750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
217850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete matcher;
217950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pat;
218050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
218150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input);
218250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&re);
2183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
2184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
218650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   find with zero length matches, match position should bump ahead
218750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     to prevent loops.
2188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
218950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
219050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t                 i;
219150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode          status=U_ZERO_ERROR;
219250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher        m("(?= ?)", 0, status);   // This pattern will zero-length matches anywhere,
219350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                      //   using an always-true look-ahead.
219450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
219550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText s = UTEXT_INITIALIZER;
219650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&s, "    ", -1, &status);
219750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(&s);
219850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (i=0; ; i++) {
219950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (m.find() == FALSE) {
220050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
220150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
220250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.start(status) == i);
220350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.end(status) == i);
220450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
220550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(i==5);
220650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
220750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Check that the bump goes over characters outside the BMP OK
220850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // "\\U00010001\\U00010002\\U00010003\\U00010004".unescape()...in UTF-8
220950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        unsigned char aboveBMP[] = {0xF0, 0x90, 0x80, 0x81, 0xF0, 0x90, 0x80, 0x82, 0xF0, 0x90, 0x80, 0x83, 0xF0, 0x90, 0x80, 0x84, 0x00};
221050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&s, (char *)aboveBMP, -1, &status);
221150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(&s);
221227f654740f2a26ad62a5c155af9199af9e69b889claireho        for (i=0; ; i+=4) {
221350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (m.find() == FALSE) {
221450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
221550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
221650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.start(status) == i);
221750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.end(status) == i);
2218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
221927f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(i==20);
222050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
222150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&s);
222250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
222350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
222450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // find() loop breaking test.
222550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //        with pattern of /.?/, should see a series of one char matches, then a single
222650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //        match of zero length at the end of the input string.
222750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t                 i;
222850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode          status=U_ZERO_ERROR;
222950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher        m(".?", 0, status);
223050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
223150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText s = UTEXT_INITIALIZER;
223250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&s, "    ", -1, &status);
223350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(&s);
223450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (i=0; ; i++) {
223550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (m.find() == FALSE) {
223650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
223750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
223850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.start(status) == i);
223950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));
224050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
224150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(i==5);
224250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
224350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&s);
2244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
224550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
224850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Matchers with no input string behave as if they had an empty input string.
2249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
2250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
225150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
225250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
225350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher  m(".?", 0, status);
225450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
225550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.find());
225650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.start(status) == 0);
225750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.input() == "");
2258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
225950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
226050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
226150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern  *p = RegexPattern::compile(".", 0, status);
226250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher  *m = p->matcher(status);
226350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
2264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
226550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m->find() == FALSE);
226650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0);
226750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete m;
226850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete p;
2269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
227050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
227150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
227250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Regions
227350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
227450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
227550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
227650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText testPattern = UTEXT_INITIALIZER;
227750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText testText    = UTEXT_INITIALIZER;
227827f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status);
227927f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_VERBOSE_TEXT(&testPattern);
228027f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status);
228127f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_VERBOSE_TEXT(&testText);
228250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
228350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher m(&testPattern, &testText, 0, status);
228450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
228550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionStart() == 0);
228650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
228750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
228850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
228950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
229050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.region(2,4, status);
229150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
229250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.matches(status));
229350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.start(status)==2);
229450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.end(status)==4);
229550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
229650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
229750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset();
229850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionStart() == 0);
229950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
230050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
230127f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&testText, "short", -1, &status);
230227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_VERBOSE_TEXT(&testText);
230350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(&testText);
230450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionStart() == 0);
230550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short"));
230650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
230750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
230850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
230950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
231050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.reset());
231150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
231250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
231350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
231450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
231550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.reset());
231650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
231750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
231850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
231950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
232050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
232150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.reset());
232250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
2323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
232450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));
232550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
232650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.reset());
232750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
232850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
232950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&testText);
233050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&testPattern);
2331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
233250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
233350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
233450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // hitEnd() and requireEnd()
233550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
233650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
233750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
233850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText testPattern = UTEXT_INITIALIZER;
233950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText testText    = UTEXT_INITIALIZER;
234027f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */
234127f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */
234227f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&testPattern, str_, -1, &status);
234327f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&testText, str_aabb, -1, &status);
234450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
234550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher m1(&testPattern, &testText,  0, status);
234650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1.lookingAt(status) == TRUE);
234750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1.hitEnd() == TRUE);
234850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1.requireEnd() == FALSE);
234950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
235050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
235150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
235227f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */
235327f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&testPattern, str_a, -1, &status);
235450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher m2(&testPattern, &testText, 0, status);
235550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m2.lookingAt(status) == TRUE);
235650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m2.hitEnd() == FALSE);
235750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m2.requireEnd() == FALSE);
235850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
2359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
236050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
236127f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */
236227f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&testPattern, str_dotstardollar, -1, &status);
236350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher m3(&testPattern, &testText, 0, status);
236450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m3.lookingAt(status) == TRUE);
236550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m3.hitEnd() == TRUE);
236650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m3.requireEnd() == TRUE);
236750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
236850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
236950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&testText);
237050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&testPattern);
2371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
2372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
2373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//---------------------------------------------------------------------------
2376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
237750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//      API_Replace_UTF8   API test for class RegexMatcher, testing the
237850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                         Replace family of functions.
2379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
2380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
238150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Replace_UTF8() {
238250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
238350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Replace
238450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
238550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             flags=0;
238650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError         pe;
238750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode          status=U_ZERO_ERROR;
2388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
238950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText               re=UTEXT_INITIALIZER;
239027f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&re, "abc", -1, &status);
239127f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_VERBOSE_TEXT(&re);
239250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
239350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
239450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
239527f654740f2a26ad62a5c155af9199af9e69b889claireho    char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */
239650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //             012345678901234567
239750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText dataText = UTEXT_INITIALIZER;
239850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&dataText, data, -1, &status);
239927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_CHECK_STATUS;
240027f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_VERBOSE_TEXT(&dataText);
2401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText);
2402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
240350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
240450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Plain vanilla matches.
240550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
240650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString  dest;
240750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText destText = UTEXT_INITIALIZER;
240850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUnicodeString(&destText, &dest, &status);
240950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText *result;
241050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
241150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText replText = UTEXT_INITIALIZER;
241250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
241327f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */
241427f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&replText, str_yz, -1, &status);
241527f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_VERBOSE_TEXT(&replText);
241650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, NULL, status);
241750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
241827f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */
241927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result);
242050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
242150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, &destText, status);
242250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
242350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
242427f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result);
2425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
242650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, NULL, status);
242750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
242827f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_yzyzyz[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x00 }; /* .yz..yz...yz.. */
242927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result);
243050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
243250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
243350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, &destText, status);
243450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
243550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
243627f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result);
243750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
243850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
243950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Plain vanilla non-matches.
244050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
244127f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */
244227f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&dataText, str_abxabxabx, -1, &status);
244350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher->reset(&dataText);
244450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
244550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, NULL, status);
244650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
244727f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
244850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
244950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, &destText, status);
245050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
245150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
245227f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
245350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
245450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, NULL, status);
245550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
245627f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
245750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
245850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
245950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, &destText, status);
246050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
246150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
246227f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
246350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
246450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
246550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Empty source string
246650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
246750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&dataText, NULL, 0, &status);
246850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher->reset(&dataText);
246950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
247050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, NULL, status);
247150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
247227f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8("", result);
247350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
247450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, &destText, status);
247550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
247650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
247727f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8("", result);
247850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
247950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, NULL, status);
248050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
248127f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8("", result);
248250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
248350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, &destText, status);
248450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
248550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
248627f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8("", result);
248750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
248850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
248950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Empty substitution string
249050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
249150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.."
249250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher->reset(&dataText);
249350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
249450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&replText, NULL, 0, &status);
249550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, NULL, status);
249650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
249727f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */
249827f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);
249950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
250050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, &destText, status);
250150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
250250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
250327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);
250450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
250550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, NULL, status);
250650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
250727f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_dots[] = { 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x00 }; /* ........ */
250827f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_dots, result);
250950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
251050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
251150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, &destText, status);
251250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
251350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
251427f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_dots, result);
251550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
251650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
251750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // match whole string
251850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
251927f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
252027f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&dataText, str_abc, -1, &status);
252150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher->reset(&dataText);
252250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
252327f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_xyz[] = { 0x78, 0x79, 0x7a, 0x00 }; /* xyz */
252427f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&replText, str_xyz, -1, &status);
252550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, NULL, status);
252650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
252727f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
252850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
252950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
253050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, &destText, status);
253150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
253250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
253327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
253450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
253550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, NULL, status);
253650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
253727f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
253850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
253950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
254050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, &destText, status);
254150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
254250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
254327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
254450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
254550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
254650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Capture Group, simple case
254750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
254827f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */
254927f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&re, str_add, -1, &status);
255050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status);
255150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
255227f654740f2a26ad62a5c155af9199af9e69b889claireho
255327f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* abcdefg */
255427f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&dataText, str_abcdefg, -1, &status);
2555b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText);
255650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
255750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
255827f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */
255927f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&replText, str_11, -1, &status);
256050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, NULL, status);
256150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
256227f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* bcbcdefg */
256327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);
256450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
256550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
256650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, &destText, status);
256750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
256850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
256927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);
2570b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
2571b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */
2572b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    utext_openUTF8(&replText, str_v, -1, &status);
2573b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_VERBOSE_TEXT(&replText);
257450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, NULL, status);
257550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
257627f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg */
257727f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
257850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
257950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
258050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, &destText, status);
258150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
258250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
258327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
258450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
258527f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */
258627f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);
258750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, NULL, status);
258850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
258927f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */
259027f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);
259150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
259250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, &destText, status);
259450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
259550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
259627f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);
259750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
259827f654740f2a26ad62a5c155af9199af9e69b889claireho    unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx. */
259927f654740f2a26ad62a5c155af9199af9e69b889claireho    //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001D7CF, MATHEMATICAL BOLD DIGIT ONE
260050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //                                 012345678901234567890123456
260150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    supplDigitChars[22] = 0xF0;
260250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    supplDigitChars[23] = 0x9D;
260350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    supplDigitChars[24] = 0x9F;
260450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    supplDigitChars[25] = 0x8F;
260550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status);
260650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
260750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, NULL, status);
260850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
260927f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */
261027f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);
261150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
261250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
261350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, &destText, status);
261450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
261550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
261627f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);
261727f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e,  0x00 }; /* bad capture group number $5..." */
261827f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status);
261950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status)), U_INDEX_OUTOFBOUNDS_ERROR);
262027f654740f2a26ad62a5c155af9199af9e69b889claireho//    REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);
262150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
262250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
262350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, status)), U_INDEX_OUTOFBOUNDS_ERROR);
262450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
262527f654740f2a26ad62a5c155af9199af9e69b889claireho//    REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);
262650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
262750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
262850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Replacement String with \u hex escapes
262950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
2630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
263127f654740f2a26ad62a5c155af9199af9e69b889claireho      const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61, 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 abc 2 abc 3 */
263227f654740f2a26ad62a5c155af9199af9e69b889claireho      const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33, 0x2d, 0x2d, 0x00 }; /* --\u0043-- */
263327f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status);
263427f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&replText, str_u0043, -1, &status);
263550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->reset(&dataText);
263650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
263750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->replaceAll(&replText, NULL, status);
263850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
263927f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */
264027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);
264150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
264250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
264350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->replaceAll(&replText, &destText, status);
264450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
264550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
264627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);
264750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
264850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
264927f654740f2a26ad62a5c155af9199af9e69b889claireho      const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */
265027f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&dataText, str_abc, -1, &status);
265127f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */
265227f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&replText, str_U00010000, -1, &status);
265350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->reset(&dataText);
265450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
265527f654740f2a26ad62a5c155af9199af9e69b889claireho        unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A"
265650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //                          0123456789
265750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expected[2] = 0xF0;
265850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expected[3] = 0x90;
265950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expected[4] = 0x80;
266050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expected[5] = 0x80;
266150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
266250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->replaceAll(&replText, NULL, status);
266350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
266427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);
266550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
266650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
266750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->replaceAll(&replText, &destText, status);
266850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
266950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
267027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);
2671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
267250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // TODO:  need more through testing of capture substitutions.
2673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
267450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Bug 4057
267550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
267650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
267750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
267827f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65, 0x00 }; /* ss(.*?)ee */
267927f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66, 0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ss stuff ee fin */
268027f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
268127f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&re, str_ssee, -1, &status);
268227f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&dataText, str_blah, -1, &status);
268327f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&replText, str_ooh, -1, &status);
268450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
268550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher m(&re, 0, status);
268650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
268750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
268850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString result;
268950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText resultText = UTEXT_INITIALIZER;
269050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&resultText, &result, &status);
2691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
269250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Multiple finds do NOT bump up the previous appendReplacement postion.
269350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(&dataText);
269450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find();
269550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find();
269650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.appendReplacement(&resultText, &replText, status);
269750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
269827f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_blah2[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */
269927f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_blah2, &resultText);
2700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
270150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // After a reset into the interior of a string, appendReplacement still starts at beginning.
270250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
270350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result.truncate(0);
270450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&resultText, &result, &status);
270550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(10, status);
270650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find();
270750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find();
270850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.appendReplacement(&resultText, &replText, status);
270950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
271027f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_blah3[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */
271127f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_blah3, &resultText);
2712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
271350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // find() at interior of string, appendReplacement still starts at beginning.
271450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
271550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result.truncate(0);
271650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&resultText, &result, &status);
271750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset();
271850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find(10, status);
271950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find();
272050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.appendReplacement(&resultText, &replText, status);
272150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
272227f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */
272327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText);
2724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
272527f654740f2a26ad62a5c155af9199af9e69b889claireho        m.appendTail(&resultText, status);
272627f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */
272727f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText);
272850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
272950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&resultText);
273050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2731b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
273250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete matcher2;
273350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat2;
273450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete matcher;
273550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat;
273650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
273750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&dataText);
273850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&replText);
273950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&destText);
274050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&re);
2741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
274450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
2745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
274650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//      API_Pattern_UTF8  Test that the API for class RegexPattern is
274750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                        present and nominally working.
274850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
274950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
275050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Pattern_UTF8() {
275150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        pata;    // Test default constructor to not crash.
275250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        patb;
275350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
275450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pata == patb);
275550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pata == pata);
275650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
275750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText         re1 = UTEXT_INITIALIZER;
275850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText         re2 = UTEXT_INITIALIZER;
275950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode    status = U_ZERO_ERROR;
276050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError   pe;
276150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
276227f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */
276327f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */
276427f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&re1, str_abcalmz, -1, &status);
276527f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&re2, str_def, -1, &status);
276650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
276750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *pat1 = RegexPattern::compile(&re1, 0, pe, status);
276850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *pat2 = RegexPattern::compile(&re2, 0, pe, status);
276950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
277050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1 == *pat1);
277150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1 != pata);
277250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
277350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Assign
277450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    patb = *pat1;
277550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(patb == *pat1);
277650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
277750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Copy Construct
277850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern patc(*pat1);
277950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(patc == *pat1);
278050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(patb == patc);
278150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pat1 != pat2);
278250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    patb = *pat2;
278350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(patb != patc);
278450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(patb == *pat2);
278550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
278650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Compile with no flags.
278750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern         *pat1a = RegexPattern::compile(&re1, pe, status);
278850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1a == *pat1);
278950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
279050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pat1a->flags() == 0);
279150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
279250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Compile with different flags should be not equal
279350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *pat1b = RegexPattern::compile(&re1, UREGEX_CASE_INSENSITIVE, pe, status);
279450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
279550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
279650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1b != *pat1a);
279750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE);
279850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pat1a->flags() == 0);
279950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1b;
280050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
280150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // clone
280250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *pat1c = pat1->clone();
280350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1c == *pat1);
280450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1c != *pat2);
280550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
280650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1c;
280750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1a;
280850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
280950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat2;
281050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
281150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&re1);
281250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&re2);
281350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
281450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
281550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
281650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   Verify that a matcher created from a cloned pattern works.
281750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     (Jitterbug 3423)
281850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
281950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
282050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode     status     = U_ZERO_ERROR;
282150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText          pattern    = UTEXT_INITIALIZER;
282227f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */
282327f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_pL, -1, &status);
282450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
282550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern  *pSource    = RegexPattern::compile(&pattern, 0, status);
282650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern  *pClone     = pSource->clone();
282750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete         pSource;
282850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher  *mFromClone = pClone->matcher(status);
282950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
283050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
283150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText          input      = UTEXT_INITIALIZER;
283227f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */
283327f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_HelloWorld, -1, &status);
283450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        mFromClone->reset(&input);
283550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(mFromClone->find() == TRUE);
283650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(mFromClone->group(status) == "Hello");
283750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(mFromClone->find() == TRUE);
283850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(mFromClone->group(status) == "World");
283950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(mFromClone->find() == FALSE);
284050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete mFromClone;
284150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pClone;
284250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
284350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input);
284450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&pattern);
284550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
284650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
284750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
284850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   matches convenience API
284950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
285050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
285150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status  = U_ZERO_ERROR;
285250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText      pattern = UTEXT_INITIALIZER;
285350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText      input   = UTEXT_INITIALIZER;
285450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
285527f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */
285627f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_randominput, -1, &status);
285750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
285827f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */
285927f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_dotstar, -1, &status);
286050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE);
286150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
286250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
286327f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
286427f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_abc, -1, &status);
286550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);
286650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
286750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
286827f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */
286927f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_nput, -1, &status);
287050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);
287150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
287250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
287327f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_randominput, -1, &status);
287450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);
287550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
287650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
287727f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */
287827f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_u, -1, &status);
287950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);
288050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
288150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
288227f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_abc, -1, &status);
288327f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_abc, -1, &status);
288450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
288550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);
288650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
288750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
288850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input);
288950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&pattern);
289050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
289150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
289250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
289350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
289450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Split()
289550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
289650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
289727f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /*  + */
289827f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&re1, str_spaceplus, -1, &status);
289950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pat1 = RegexPattern::compile(&re1, pe, status);
290050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
290150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString  fields[10];
290250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
290350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t n;
290450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("Now is the time", fields, 10, status);
290550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
290650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==4);
290750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="Now");
290850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="is");
290950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="the");
291050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="time");
291150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="");
291250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
291350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("Now is the time", fields, 2, status);
291450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
291550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==2);
291650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="Now");
291750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="is the time");
291850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="the");   // left over from previous test
291950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
292050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fields[1] = "*";
292150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
292250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("Now is the time", fields, 1, status);
292350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
292450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==1);
292550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="Now is the time");
292650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="*");
292750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
292850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
292950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("    Now       is the time   ", fields, 10, status);
293050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
2931b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==6);
293250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="");
293350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="Now");
293450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="is");
293550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="the");
293650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="time");
293750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[5]=="");
2938b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[6]=="");
293950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2940b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    fields[2] = "*";
294150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("     ", fields, 10, status);
294250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
2943b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==2);
294450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="");
2945b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[1]=="");
2946b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[2]=="*");
294750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
294850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fields[0] = "foo";
294950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("", fields, 10, status);
295050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
295150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==0);
295250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="foo");
295350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
295450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
295550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
295650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  split, with a pattern with (capture)
295727f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&re1, "<(\\w*)>", -1, &status);
295850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pat1 = RegexPattern::compile(&re1,  pe, status);
295950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
296050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
296150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
2962b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    fields[6] = fields[7] = "*";
296350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status);
296450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
2965b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==7);
296650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="");
296750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
296850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
296950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="b");
297050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="the time");
297150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[5]=="c");
297250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[6]=="");
2973b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[7]=="*");
297450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(status==U_ZERO_ERROR);
297550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2976b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    fields[6] = fields[7] = "*";
297750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("  <a>Now is <b>the time<c>", fields, 10, status);
297850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
2979b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==7);
298050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="  ");
298150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
298250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
298350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="b");
298450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="the time");
298550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[5]=="c");
298650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[6]=="");
2987b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[7]=="*");
298850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
298950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
299050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fields[6] = "foo";
2991b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    n = pat1->split("  <a>Now is <b>the time<c> ", fields, 6, status);
299250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
299350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==6);
299450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="  ");
299550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
299650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
299750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="b");
299850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="the time");
2999b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[5]==" ");
300050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[6]=="foo");
300150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
300250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
300350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fields[5] = "foo";
300450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("  <a>Now is <b>the time<c>", fields, 5, status);
300550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
300650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==5);
300750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="  ");
300850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
300950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
301050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="b");
301150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="the time<c>");
301250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[5]=="foo");
301350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
301450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
301550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fields[5] = "foo";
301650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("  <a>Now is <b>the time", fields, 5, status);
301750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
301850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==5);
301950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="  ");
302050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
302150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
302250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="b");
302350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="the time");
302450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[5]=="foo");
302550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
302650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
302750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("  <a>Now is <b>the time<c>", fields, 4, status);
302850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
302950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==4);
303050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="  ");
303150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
303250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
303350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="the time<c>");
303450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
303550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
303650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
303727f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&re1, "([-,])", -1, &status);
303850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pat1 = RegexPattern::compile(&re1, pe, status);
303950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
304050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("1-10,20", fields, 10, status);
304150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
304250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==5);
304350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="1");
304450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="-");
304550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="10");
304650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]==",");
304750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="20");
304850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
304950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
305050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
305150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
305250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // RegexPattern::pattern() and patternText()
305350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
305450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pat1 = new RegexPattern();
305550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pat1->pattern() == "");
305627f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText(status));
305750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
3058b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const char *helloWorldInvariant = "(Hello, world)*";
3059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status);
306050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pat1 = RegexPattern::compile(&re1, pe, status);
306150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
3062b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT_UNISTR(pat1->pattern(),"(Hello, world)*");
306327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status));
306450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
306550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
306650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&re1);
306750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
306850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
306950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
307050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
307150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
307250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//      Extended       A more thorough check for features of regex patterns
307350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     The test cases are in a separate data file,
307450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       source/tests/testdata/regextst.txt
307550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     A description of the test data format is included in that file.
307650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
307750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
307850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
307950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst char *
308050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexTest::getPath(char buffer[2048], const char *filename) {
308150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode status=U_ZERO_ERROR;
308250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char *testDataDirectory = IntlTest::getSourceTestData(status);
308350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
308450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("ERROR: loadTestData() failed - %s", u_errorName(status));
308550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
308650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
308750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
308850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    strcpy(buffer, testDataDirectory);
308950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    strcat(buffer, filename);
309050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return buffer;
309150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
309250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
309350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Extended() {
309450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char tdd[2048];
309550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char *srcPath;
309650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode  status  = U_ZERO_ERROR;
309750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     lineNum = 0;
309850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
309950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
310050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Open and read the test data file.
310150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
310250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    srcPath=getPath(tdd, "regextst.txt");
310350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(srcPath==NULL) {
310450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return; /* something went wrong, error already output */
310550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
310650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
310750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t    len;
310850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *testData = ReadAndConvertFile(srcPath, len, "utf-8", status);
310950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
311050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return; /* something went wrong, error already output */
311150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
311250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
311350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
311450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Put the test data into a UnicodeString
311550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
311650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString testString(FALSE, testData, len);
311750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
311850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher    quotedStuffMat(UNICODE_STRING_SIMPLE("\\s*([\\'\\\"/])(.*?)\\1"), 0, status);
311950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher    commentMat    (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
312050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher    flagsMat      (UNICODE_STRING_SIMPLE("\\s*([ixsmdteDEGLMvabtyYzZ2-9]*)([:letter:]*)"), 0, status);
312150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
312250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher    lineMat(UNICODE_STRING_SIMPLE("(.*?)\\r?\\n"), testString, 0, status);
312350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString   testPattern;   // The pattern for test from the test file.
312450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString   testFlags;     // the flags   for a test.
312550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString   matchString;   // The marked up string to be used as input
312650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
312750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)){
312850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("Construct RegexMatcher() error.");
312950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete [] testData;
313050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
313150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
313250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
313350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
313450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Loop over the test data file, once per line.
313550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
313650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while (lineMat.find()) {
313750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lineNum++;
313850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
313927f654740f2a26ad62a5c155af9199af9e69b889claireho          errln("%s:%d: ICU Error \"%s\"", srcPath, lineNum, u_errorName(status));
314050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
314150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
314250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
314350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString testLine = lineMat.group(1, status);
314450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (testLine.length() == 0) {
314550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
314650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
314750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
314850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
314950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Parse the test line.  Skip blank and comment only lines.
315050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Separate out the three main fields - pattern, flags, target.
315150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
315250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
315350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        commentMat.reset(testLine);
315450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (commentMat.lookingAt(status)) {
315550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // This line is a comment, or blank.
315650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
315750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
315850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
315950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
316050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Pull out the pattern field, remove it from the test file line.
316150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
316250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        quotedStuffMat.reset(testLine);
316350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (quotedStuffMat.lookingAt(status)) {
316450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            testPattern = quotedStuffMat.group(2, status);
316550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            testLine.remove(0, quotedStuffMat.end(0, status));
316650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
316727f654740f2a26ad62a5c155af9199af9e69b889claireho            errln("Bad pattern (missing quotes?) at %s:%d", srcPath, lineNum);
316850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
316950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
317050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
317150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
317250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
317350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Pull out the flags from the test file line.
317450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
317550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        flagsMat.reset(testLine);
317650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        flagsMat.lookingAt(status);                  // Will always match, possibly an empty string.
317750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        testFlags = flagsMat.group(1, status);
317850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flagsMat.group(2, status).length() > 0) {
317950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Bad Match flag at line %d. Scanning %c\n",
318050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                lineNum, flagsMat.group(2, status).charAt(0));
318150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
318250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
318350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        testLine.remove(0, flagsMat.end(0, status));
318450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
318550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
318650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Pull out the match string, as a whole.
318750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    We'll process the <tags> later.
318850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
318950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        quotedStuffMat.reset(testLine);
319050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (quotedStuffMat.lookingAt(status)) {
319150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            matchString = quotedStuffMat.group(2, status);
319250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            testLine.remove(0, quotedStuffMat.end(0, status));
319350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
319450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Bad match string at test file line %d", lineNum);
319550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
319650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
319750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
319850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
319950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  The only thing left from the input line should be an optional trailing comment.
320050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
320150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        commentMat.reset(testLine);
320250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (commentMat.lookingAt(status) == FALSE) {
320350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Line %d: unexpected characters at end of test line.", lineNum);
320450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
320550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
320650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
320750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
320850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Run the test
320950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
321027f654740f2a26ad62a5c155af9199af9e69b889claireho        regex_find(testPattern, testFlags, matchString, srcPath, lineNum);
321150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
321250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete [] testData;
321450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
321650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
322050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
322150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    regex_find(pattern, flags, inputString, lineNumber)
322250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
322350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//         Function to run a single test from the Extended (data driven) tests.
322450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//         See file test/testdata/regextst.txt for a description of the
322550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//         pattern and inputString fields, and the allowed flags.
322650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//         lineNumber is the source line in regextst.txt of the test.
322750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
322850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
322950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
323050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
323150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  Set a value into a UVector at position specified by a decimal number in
323250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   a UnicodeString.   This is a utility function needed by the actual test function,
323350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   which follows.
323450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic void set(UVector &vec, int32_t val, UnicodeString index) {
323550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode  status=U_ZERO_ERROR;
323650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t  idx = 0;
323750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (int32_t i=0; i<index.length(); i++) {
323850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t d=u_charDigitValue(index.charAt(i));
323950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (d<0) {return;}
324050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        idx = idx*10 + d;
324150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
324250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while (vec.size()<idx+1) {vec.addElement(-1, status);}
324350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    vec.setElementAt(val, idx);
324450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
324550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
324627f654740f2a26ad62a5c155af9199af9e69b889clairehostatic void setInt(UVector &vec, int32_t val, int32_t idx) {
324727f654740f2a26ad62a5c155af9199af9e69b889claireho    UErrorCode  status=U_ZERO_ERROR;
324827f654740f2a26ad62a5c155af9199af9e69b889claireho    while (vec.size()<idx+1) {vec.addElement(-1, status);}
324927f654740f2a26ad62a5c155af9199af9e69b889claireho    vec.setElementAt(val, idx);
325027f654740f2a26ad62a5c155af9199af9e69b889claireho}
325127f654740f2a26ad62a5c155af9199af9e69b889claireho
325227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool utextOffsetToNative(UText *utext, int32_t unistrOffset, int32_t& nativeIndex)
325327f654740f2a26ad62a5c155af9199af9e69b889claireho{
325427f654740f2a26ad62a5c155af9199af9e69b889claireho    UBool couldFind = TRUE;
325527f654740f2a26ad62a5c155af9199af9e69b889claireho    UTEXT_SETNATIVEINDEX(utext, 0);
325627f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t i = 0;
325727f654740f2a26ad62a5c155af9199af9e69b889claireho    while (i < unistrOffset) {
325827f654740f2a26ad62a5c155af9199af9e69b889claireho        UChar32 c = UTEXT_NEXT32(utext);
325927f654740f2a26ad62a5c155af9199af9e69b889claireho        if (c != U_SENTINEL) {
326027f654740f2a26ad62a5c155af9199af9e69b889claireho            i += U16_LENGTH(c);
326127f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
326227f654740f2a26ad62a5c155af9199af9e69b889claireho            couldFind = FALSE;
326327f654740f2a26ad62a5c155af9199af9e69b889claireho            break;
326427f654740f2a26ad62a5c155af9199af9e69b889claireho        }
326527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
3266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    nativeIndex = (int32_t)UTEXT_GETNATIVEINDEX(utext);
326727f654740f2a26ad62a5c155af9199af9e69b889claireho    return couldFind;
326827f654740f2a26ad62a5c155af9199af9e69b889claireho}
326927f654740f2a26ad62a5c155af9199af9e69b889claireho
327027f654740f2a26ad62a5c155af9199af9e69b889claireho
327150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::regex_find(const UnicodeString &pattern,
327250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           const UnicodeString &flags,
327350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           const UnicodeString &inputString,
327427f654740f2a26ad62a5c155af9199af9e69b889claireho                           const char *srcPath,
327550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           int32_t line) {
327650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString       unEscapedInput;
327750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString       deTaggedInput;
327850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
327950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             patternUTF8Length,      inputUTF8Length;
328050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char                *patternChars  = NULL, *inputChars = NULL;
328150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText               patternText    = UTEXT_INITIALIZER;
328250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText               inputText      = UTEXT_INITIALIZER;
328350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UConverter          *UTF8Converter = NULL;
328450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
328550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode          status         = U_ZERO_ERROR;
328650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError         pe;
328750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *parsePat      = NULL;
328850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher        *parseMatcher  = NULL;
328950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *callerPattern = NULL, *UTF8Pattern = NULL;
329050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher        *matcher       = NULL, *UTF8Matcher = NULL;
329150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UVector             groupStarts(status);
329250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UVector             groupEnds(status);
329327f654740f2a26ad62a5c155af9199af9e69b889claireho    UVector             groupStartsUTF8(status);
329427f654740f2a26ad62a5c155af9199af9e69b889claireho    UVector             groupEndsUTF8(status);
329550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool               isMatch        = FALSE, isUTF8Match = FALSE;
329650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool               failed         = FALSE;
329750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             numFinds;
329850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             i;
329950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool               useMatchesFunc   = FALSE;
330050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool               useLookingAtFunc = FALSE;
330150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             regionStart      = -1;
330250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             regionEnd        = -1;
330327f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t             regionStartUTF8  = -1;
330427f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t             regionEndUTF8    = -1;
330527f654740f2a26ad62a5c155af9199af9e69b889claireho
330650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
330750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
330850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Compile the caller's pattern
330950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
331050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint32_t bflags = 0;
331150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x69) >= 0)  { // 'i' flag
331250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_CASE_INSENSITIVE;
331350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
331450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x78) >= 0)  { // 'x' flag
331550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_COMMENTS;
331650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
331750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x73) >= 0)  { // 's' flag
331850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_DOTALL;
331950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
332050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x6d) >= 0)  { // 'm' flag
332150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_MULTILINE;
332250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
332350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
332450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag
332550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;
332650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
332750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag
332850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_UNIX_LINES;
332950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
333050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
333150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
333250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    callerPattern = RegexPattern::compile(pattern, bflags, pe, status);
333350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (status != U_ZERO_ERROR) {
333450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        #if UCONFIG_NO_BREAK_ITERATION==1
333550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // 'v' test flag means that the test pattern should not compile if ICU was configured
333650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //     to not include break iteration.  RBBI is needed for Unicode word boundaries.
333750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) {
333850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto cleanupAndReturn;
333950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
334050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        #endif
334150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flags.indexOf((UChar)0x45) >= 0) {  //  flags contain 'E'
334250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Expected pattern compilation error.
334350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (flags.indexOf((UChar)0x64) >= 0) {   // flags contain 'd'
334450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                logln("Pattern Compile returns \"%s\"", u_errorName(status));
334550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
334650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto cleanupAndReturn;
334750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
334850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Unexpected pattern compilation error.
3349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            dataerrln("Line %d: error %s compiling pattern.", line, u_errorName(status));
335050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto cleanupAndReturn;
335150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
335250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
335350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
335450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UTF8Converter = ucnv_open("UTF8", &status);
335550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
335650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
335750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status);
335850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR; // buffer overflow
335950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    patternChars = new char[patternUTF8Length+1];
336050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status);
336150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status);
336250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
336350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (status == U_ZERO_ERROR) {
336450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status);
336550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
336650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status != U_ZERO_ERROR) {
336750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==1
336850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // 'v' test flag means that the test pattern should not compile if ICU was configured
336950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //     to not include break iteration.  RBBI is needed for Unicode word boundaries.
337050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) {
337150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto cleanupAndReturn;
337250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
337350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
337450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (flags.indexOf((UChar)0x45) >= 0) {  //  flags contain 'E'
337550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Expected pattern compilation error.
337650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (flags.indexOf((UChar)0x64) >= 0) {   // flags contain 'd'
337750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(status));
337850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
337950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto cleanupAndReturn;
338050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
338150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Unexpected pattern compilation error.
338250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                errln("Line %d: error %s compiling pattern. (UTF8)", line, u_errorName(status));
338350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto cleanupAndReturn;
338450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
338550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
338650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
338750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
338850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTF8Pattern == NULL) {
338950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
339027f654740f2a26ad62a5c155af9199af9e69b889claireho        logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line);
339150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
339250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
339350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
339450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x64) >= 0) {  // 'd' flag
339550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPatternDump(callerPattern);
339650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
339750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
339850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x45) >= 0) {  // 'E' flag
339927f654740f2a26ad62a5c155af9199af9e69b889claireho        errln("%s, Line %d: Expected, but did not get, a pattern compilation error.", srcPath, line);
340050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanupAndReturn;
340150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
340250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
340350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
340450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
340550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Number of times find() should be called on the test string, default to 1
340650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
340750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    numFinds = 1;
340850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i=2; i<=9; i++) {
340950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flags.indexOf((UChar)(0x30 + i)) >= 0) {   // digit flag
341050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (numFinds != 1) {
341150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                errln("Line %d: more than one digit flag.  Scanning %d.", line, i);
341250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto cleanupAndReturn;
341350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
341450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            numFinds = i;
341550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
341650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
341750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
341850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // 'M' flag.  Use matches() instead of find()
341950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x4d) >= 0) {
342050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        useMatchesFunc = TRUE;
342150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
342250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x4c) >= 0) {
342350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        useLookingAtFunc = TRUE;
342450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
342550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
342650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
342750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Find the tags in the input data, remove them, and record the group boundary
342850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    positions.
342950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
343050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    parsePat = RegexPattern::compile("<(/?)(r|[0-9]+)>", 0, pe, status);
343150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS_L(line);
343250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
343350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    unEscapedInput = inputString.unescape();
343450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    parseMatcher = parsePat->matcher(unEscapedInput, status);
343550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS_L(line);
343650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(parseMatcher->find()) {
343750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        parseMatcher->appendReplacement(deTaggedInput, "", status);
343850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
343950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString groupNum = parseMatcher->group(2, status);
344050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (groupNum == "r") {
344150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // <r> or </r>, a region specification within the string
344250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (parseMatcher->group(1, status) == "/") {
344350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                regionEnd = deTaggedInput.length();
344450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
344550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                regionStart = deTaggedInput.length();
344650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
344750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
344850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // <digits> or </digits>, a group match boundary tag.
344950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (parseMatcher->group(1, status) == "/") {
345050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                set(groupEnds, deTaggedInput.length(), groupNum);
345150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
345250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                set(groupStarts, deTaggedInput.length(), groupNum);
345350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
345450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
345550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
345650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    parseMatcher->appendTail(deTaggedInput);
345750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT_L(groupStarts.size() == groupEnds.size(), line);
345850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ((regionStart>=0 || regionEnd>=0) && (regionStart<0 || regionStart>regionEnd)) {
345950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho      errln("mismatched <r> tags");
346050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho      failed = TRUE;
346150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho      goto cleanupAndReturn;
346250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
346350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
346450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
346550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Configure the matcher according to the flags specified with this test.
346650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
346750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher = callerPattern->matcher(deTaggedInput, status);
346850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS_L(line);
346950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x74) >= 0) {   //  't' trace flag
347050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->setTrace(TRUE);
347150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
347250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
347350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTF8Pattern != NULL) {
347450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status);
347550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR; // buffer overflow
347650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        inputChars = new char[inputUTF8Length+1];
347750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, status);
347850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status);
347950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
348050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status == U_ZERO_ERROR) {
3481b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText);
348250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS_L(line);
348350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
348450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
348550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTF8Matcher == NULL) {
348650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
348727f654740f2a26ad62a5c155af9199af9e69b889claireho          logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line);
348850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
348950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
349050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
349150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
349227f654740f2a26ad62a5c155af9199af9e69b889claireho    //
349327f654740f2a26ad62a5c155af9199af9e69b889claireho    //  Generate native indices for UTF8 versions of region and capture group info
349427f654740f2a26ad62a5c155af9199af9e69b889claireho    //
349527f654740f2a26ad62a5c155af9199af9e69b889claireho    if (UTF8Matcher != NULL) {
349627f654740f2a26ad62a5c155af9199af9e69b889claireho        if (regionStart>=0)    (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8);
349727f654740f2a26ad62a5c155af9199af9e69b889claireho        if (regionEnd>=0)      (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);
349827f654740f2a26ad62a5c155af9199af9e69b889claireho
349927f654740f2a26ad62a5c155af9199af9e69b889claireho        //  Fill out the native index UVector info.
350027f654740f2a26ad62a5c155af9199af9e69b889claireho        //  Only need 1 loop, from above we know groupStarts.size() = groupEnds.size()
350127f654740f2a26ad62a5c155af9199af9e69b889claireho        for (i=0; i<groupStarts.size(); i++) {
350227f654740f2a26ad62a5c155af9199af9e69b889claireho            int32_t  start = groupStarts.elementAti(i);
350327f654740f2a26ad62a5c155af9199af9e69b889claireho            //  -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting
350427f654740f2a26ad62a5c155af9199af9e69b889claireho            if (start >= 0) {
350527f654740f2a26ad62a5c155af9199af9e69b889claireho                int32_t  startUTF8;
350627f654740f2a26ad62a5c155af9199af9e69b889claireho                if (!utextOffsetToNative(&inputText, start, startUTF8)) {
350727f654740f2a26ad62a5c155af9199af9e69b889claireho                    errln("Error at line %d: could not find native index for group start %d.  UTF16 index %d", line, i, start);
350827f654740f2a26ad62a5c155af9199af9e69b889claireho                    failed = TRUE;
350927f654740f2a26ad62a5c155af9199af9e69b889claireho                    goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
351027f654740f2a26ad62a5c155af9199af9e69b889claireho                }
351127f654740f2a26ad62a5c155af9199af9e69b889claireho                setInt(groupStartsUTF8, startUTF8, i);
351227f654740f2a26ad62a5c155af9199af9e69b889claireho            }
351327f654740f2a26ad62a5c155af9199af9e69b889claireho
351427f654740f2a26ad62a5c155af9199af9e69b889claireho            int32_t  end = groupEnds.elementAti(i);
351527f654740f2a26ad62a5c155af9199af9e69b889claireho            //  -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting
351627f654740f2a26ad62a5c155af9199af9e69b889claireho            if (end >= 0) {
351727f654740f2a26ad62a5c155af9199af9e69b889claireho                int32_t  endUTF8;
351827f654740f2a26ad62a5c155af9199af9e69b889claireho                if (!utextOffsetToNative(&inputText, end, endUTF8)) {
351927f654740f2a26ad62a5c155af9199af9e69b889claireho                    errln("Error at line %d: could not find native index for group end %d.  UTF16 index %d", line, i, end);
352027f654740f2a26ad62a5c155af9199af9e69b889claireho                    failed = TRUE;
352127f654740f2a26ad62a5c155af9199af9e69b889claireho                    goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
352227f654740f2a26ad62a5c155af9199af9e69b889claireho                }
352327f654740f2a26ad62a5c155af9199af9e69b889claireho                setInt(groupEndsUTF8, endUTF8, i);
352427f654740f2a26ad62a5c155af9199af9e69b889claireho            }
352527f654740f2a26ad62a5c155af9199af9e69b889claireho        }
352627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
352727f654740f2a26ad62a5c155af9199af9e69b889claireho
352850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (regionStart>=0) {
352950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       matcher->region(regionStart, regionEnd, status);
353050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       REGEX_CHECK_STATUS_L(line);
353150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       if (UTF8Matcher != NULL) {
353227f654740f2a26ad62a5c155af9199af9e69b889claireho           UTF8Matcher->region(regionStartUTF8, regionEndUTF8, status);
353350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           REGEX_CHECK_STATUS_L(line);
353450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       }
353550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
353650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x61) >= 0) {   //  'a' anchoring bounds flag
353750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->useAnchoringBounds(FALSE);
353850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTF8Matcher != NULL) {
353950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTF8Matcher->useAnchoringBounds(FALSE);
354050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
354150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
354250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x62) >= 0) {   //  'b' transparent bounds flag
354350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->useTransparentBounds(TRUE);
354450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTF8Matcher != NULL) {
354550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTF8Matcher->useTransparentBounds(TRUE);
354650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
354750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
354850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
354950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
355050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
355150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
355250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Do a find on the de-tagged input using the caller's pattern
355350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     TODO: error on count>1 and not find().
355450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //           error on both matches() and lookingAt().
355550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
355650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i=0; i<numFinds; i++) {
355750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (useMatchesFunc) {
355850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = matcher->matches(status);
355950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTF8Matcher != NULL) {
356050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               isUTF8Match = UTF8Matcher->matches(status);
356150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
356250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else  if (useLookingAtFunc) {
356350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = matcher->lookingAt(status);
356450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTF8Matcher != NULL) {
356550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isUTF8Match = UTF8Matcher->lookingAt(status);
356650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
356750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
356850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = matcher->find();
356950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTF8Matcher != NULL) {
357050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isUTF8Match = UTF8Matcher->find();
357150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
357250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
357350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
357450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher->setTrace(FALSE);
357550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
357650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
357750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Match up the groups from the find() with the groups from the tags
357850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
357950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
358050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // number of tags should match number of groups from find operation.
358150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // matcher->groupCount does not include group 0, the entire match, hence the +1.
358250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   G option in test means that capture group data is not available in the
358350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     expected results, so the check needs to be suppressed.
358450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (isMatch == FALSE && groupStarts.size() != 0) {
3585b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        dataerrln("Error at line %d:  Match expected, but none found.", line);
358650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
358750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanupAndReturn;
358850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if (UTF8Matcher != NULL && isUTF8Match == FALSE && groupStarts.size() != 0) {
358950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d:  Match expected, but none found. (UTF8)", line);
359050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
359150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanupAndReturn;
359250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
359350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
359450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x47 /*G*/) >= 0) {
359550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Only check for match / no match.  Don't check capture groups.
359650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (isMatch && groupStarts.size() == 0) {
359750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d:  No match expected, but one found.", line);
359850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
359950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (UTF8Matcher != NULL && isUTF8Match && groupStarts.size() == 0) {
360050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d:  No match expected, but one found. (UTF8)", line);
360150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
360250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
360350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanupAndReturn;
360450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
360550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
360650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS_L(line);
360750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i=0; i<=matcher->groupCount(); i++) {
360850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t  expectedStart = (i >= groupStarts.size()? -1 : groupStarts.elementAti(i));
360927f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t  expectedStartUTF8 = (i >= groupStartsUTF8.size()? -1 : groupStartsUTF8.elementAti(i));
361050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (matcher->start(i, status) != expectedStart) {
361150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d: incorrect start position for group %d.  Expected %d, got %d",
361250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                line, i, expectedStart, matcher->start(i, status));
361350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
361450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
361527f654740f2a26ad62a5c155af9199af9e69b889claireho        } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expectedStartUTF8) {
361650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d: incorrect start position for group %d.  Expected %d, got %d (UTF8)",
361727f654740f2a26ad62a5c155af9199af9e69b889claireho                  line, i, expectedStartUTF8, UTF8Matcher->start(i, status));
361850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
361950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
362050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
362150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
362250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t  expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i));
362327f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t  expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i));
362450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (matcher->end(i, status) != expectedEnd) {
362550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d: incorrect end position for group %d.  Expected %d, got %d",
362650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                line, i, expectedEnd, matcher->end(i, status));
362750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
362850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Error on end position;  keep going; real error is probably yet to come as group
362950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   end positions work from end of the input data towards the front.
363027f654740f2a26ad62a5c155af9199af9e69b889claireho        } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expectedEndUTF8) {
363150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d: incorrect end position for group %d.  Expected %d, got %d (UTF8)",
363227f654740f2a26ad62a5c155af9199af9e69b889claireho                  line, i, expectedEndUTF8, UTF8Matcher->end(i, status));
363350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
363450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Error on end position;  keep going; real error is probably yet to come as group
363550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   end positions work from end of the input data towards the front.
363650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
363750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
363850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ( matcher->groupCount()+1 < groupStarts.size()) {
363950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: Expected %d capture groups, found %d.",
364050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            line, groupStarts.size()-1, matcher->groupCount());
364150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
364250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
364350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    else if (UTF8Matcher != NULL && UTF8Matcher->groupCount()+1 < groupStarts.size()) {
364450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: Expected %d capture groups, found %d. (UTF8)",
364550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              line, groupStarts.size()-1, UTF8Matcher->groupCount());
364650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
364750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
364850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
364950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ((flags.indexOf((UChar)0x59) >= 0) &&   //  'Y' flag:  RequireEnd() == false
365050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->requireEnd() == TRUE) {
365150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: requireEnd() returned TRUE.  Expected FALSE", line);
365250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
365350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) &&   //  'Y' flag:  RequireEnd() == false
365450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTF8Matcher->requireEnd() == TRUE) {
365550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: requireEnd() returned TRUE.  Expected FALSE (UTF8)", line);
365650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
365750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
365850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
365950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ((flags.indexOf((UChar)0x79) >= 0) &&   //  'y' flag:  RequireEnd() == true
366050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->requireEnd() == FALSE) {
366150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: requireEnd() returned FALSE.  Expected TRUE", line);
366250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
366350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) &&   //  'Y' flag:  RequireEnd() == false
366450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTF8Matcher->requireEnd() == FALSE) {
366550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: requireEnd() returned FALSE.  Expected TRUE (UTF8)", line);
366650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
366750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
366850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
366950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ((flags.indexOf((UChar)0x5A) >= 0) &&   //  'Z' flag:  hitEnd() == false
367050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->hitEnd() == TRUE) {
367150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: hitEnd() returned TRUE.  Expected FALSE", line);
367250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
367350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) &&   //  'Z' flag:  hitEnd() == false
367450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               UTF8Matcher->hitEnd() == TRUE) {
367550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: hitEnd() returned TRUE.  Expected FALSE (UTF8)", line);
367650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
367750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
367850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
367950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ((flags.indexOf((UChar)0x7A) >= 0) &&   //  'z' flag:  hitEnd() == true
368050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->hitEnd() == FALSE) {
368150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: hitEnd() returned FALSE.  Expected TRUE", line);
368250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
368350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) &&   //  'z' flag:  hitEnd() == true
368450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               UTF8Matcher->hitEnd() == FALSE) {
368550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: hitEnd() returned FALSE.  Expected TRUE (UTF8)", line);
368650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
368750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
368850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
368950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
369050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehocleanupAndReturn:
369150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (failed) {
369250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        infoln((UnicodeString)"\""+pattern+(UnicodeString)"\"  "
369350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            +flags+(UnicodeString)"  \""+inputString+(UnicodeString)"\"");
369450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // callerPattern->dump();
369550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
369650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete parseMatcher;
369750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete parsePat;
369850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete UTF8Matcher;
369950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete UTF8Pattern;
370050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete matcher;
370150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete callerPattern;
370250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
370350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&inputText);
370450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete[] inputChars;
370550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&patternText);
370650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete[] patternChars;
370750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ucnv_close(UTF8Converter);
370850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
370950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
371050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
371150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
371250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
371350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
371450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
371550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//      Errors     Check for error handling in patterns.
371650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
371750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
371850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Errors() {
371950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // \escape sequences that aren't implemented yet.
372050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //REGEX_ERR("hex format \\x{abcd} not implemented", 1, 13, U_REGEX_UNIMPLEMENTED);
372150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
372250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Missing close parentheses
372350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("Comment (?# with no close", 1, 25, U_REGEX_MISMATCHED_PAREN);
372450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("Capturing Parenthesis(...", 1, 25, U_REGEX_MISMATCHED_PAREN);
372550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("Grouping only parens (?: blah blah", 1, 34, U_REGEX_MISMATCHED_PAREN);
372650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
372750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Extra close paren
372850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("Grouping only parens (?: blah)) blah", 1, 31, U_REGEX_MISMATCHED_PAREN);
372950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR(")))))))", 1, 1, U_REGEX_MISMATCHED_PAREN);
373050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("(((((((", 1, 7, U_REGEX_MISMATCHED_PAREN);
373150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
373250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Look-ahead, Look-behind
373350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  TODO:  add tests for unbounded length look-behinds.
373450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc(?<@xyz).*", 1, 7, U_REGEX_RULE_SYNTAX);       // illegal construct
373550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
373650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Attempt to use non-default flags
373750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
373850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UParseError   pe;
373950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode    status = U_ZERO_ERROR;
374050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t       flags  = UREGEX_CANON_EQ |
374150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                               UREGEX_COMMENTS         | UREGEX_DOTALL   |
374250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                               UREGEX_MULTILINE;
374350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *pat1= RegexPattern::compile(".*", flags, pe, status);
374450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_REGEX_UNIMPLEMENTED);
374550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pat1;
374650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
374750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
374850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
374950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Quantifiers are allowed only after something that can be quantified.
375050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("+", 1, 1, U_REGEX_RULE_SYNTAX);
375150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc\ndef(*2)", 2, 5, U_REGEX_RULE_SYNTAX);
375250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc**", 1, 5, U_REGEX_RULE_SYNTAX);
375350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
375450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Mal-formed {min,max} quantifiers
375550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{a,2}",1,5, U_REGEX_BAD_INTERVAL);
375650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{4,2}",1,8, U_REGEX_MAX_LT_MIN);
375750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{1,b}",1,7, U_REGEX_BAD_INTERVAL);
375850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{1,,2}",1,7, U_REGEX_BAD_INTERVAL);
375950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{1,2a}",1,8, U_REGEX_BAD_INTERVAL);
376050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{222222222222222222222}",1,14, U_REGEX_NUMBER_TOO_BIG);
376150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{5,50000000000}", 1, 17, U_REGEX_NUMBER_TOO_BIG);        // Overflows int during scan
376250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{5,687865858}", 1, 16, U_REGEX_NUMBER_TOO_BIG);          // Overflows regex binary format
376350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{687865858,687865859}", 1, 24, U_REGEX_NUMBER_TOO_BIG);
376450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
376550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Ticket 5389
376650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX);
376750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
376850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Invalid Back Reference \0
376950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    For ICU 3.8 and earlier
377050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    For ICU versions newer than 3.8, \0 introduces an octal escape.
377150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
377250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE);
377350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
377450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
377550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
377650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
377750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------
377850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
377950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  Read a text data file, convert it to UChars, and return the data
378050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    in one big UChar * buffer, which the caller must delete.
378150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
378250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
378350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
378450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                     const char *defEncoding, UErrorCode &status) {
378550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar       *retPtr  = NULL;
378650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char        *fileBuf = NULL;
378750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UConverter* conv     = NULL;
378850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    FILE        *f       = NULL;
378950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
379050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ulen = 0;
379150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
379250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return retPtr;
379350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
379450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
379550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
379650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Open the file.
379750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
379850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    f = fopen(fileName, "rb");
379950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (f == 0) {
380050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("Error opening test data file %s\n", fileName);
380150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_FILE_ACCESS_ERROR;
380250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
380350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
380450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
380550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Read it in
380650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
380750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t            fileSize;
380850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t            amt_read;
380950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
381050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fseek( f, 0, SEEK_END);
381150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fileSize = ftell(f);
381250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fileBuf = new char[fileSize];
381350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fseek(f, 0, SEEK_SET);
381450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    amt_read = fread(fileBuf, 1, fileSize, f);
381550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (amt_read != fileSize || fileSize <= 0) {
381650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error reading test data file.");
381750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanUpAndReturn;
381850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
381950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
382050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
382150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Look for a Unicode Signature (BOM) on the data just read
382250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
382350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t        signatureLength;
382450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char *   fileBufC;
382550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char*    encoding;
382650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
382750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fileBufC = fileBuf;
382850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    encoding = ucnv_detectUnicodeSignature(
382950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fileBuf, fileSize, &signatureLength, &status);
383050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(encoding!=NULL ){
383150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fileBufC  += signatureLength;
383250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fileSize  -= signatureLength;
383350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
383450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        encoding = defEncoding;
383550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (strcmp(encoding, "utf-8") == 0) {
383650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("file %s is missing its BOM", fileName);
383750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
383850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
383950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
384050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
384150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Open a converter to take the rule file to UTF-16
384250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
384350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    conv = ucnv_open(encoding, &status);
384450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
384550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanUpAndReturn;
384650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
384750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
384850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
384950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Convert the rules to UChar.
385050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Preflight first to determine required buffer size.
385150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
385250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ulen = ucnv_toUChars(conv,
385350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        NULL,           //  dest,
385450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        0,              //  destCapacity,
385550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fileBufC,
385650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fileSize,
385750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        &status);
385850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (status == U_BUFFER_OVERFLOW_ERROR) {
385950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Buffer Overflow is expected from the preflight operation.
386050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
386150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
386250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retPtr = new UChar[ulen+1];
386350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ucnv_toUChars(conv,
386450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            retPtr,       //  dest,
386550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ulen+1,
386650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fileBufC,
386750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fileSize,
386850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            &status);
386950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
387050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
387150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehocleanUpAndReturn:
387250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fclose(f);
387350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete[] fileBuf;
387450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ucnv_close(conv);
387550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
387650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
3877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        delete []retPtr;
387850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retPtr = 0;
387950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ulen   = 0;
388050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    };
388150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return retPtr;
388250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
388350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
388450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
388550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------
388650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
388750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   PerlTests  - Run Perl's regular expression tests
388850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                The input file for this test is re_tests, the standard regular
388950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                expression test data distributed with the Perl source code.
389050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
389150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                Here is Perl's description of the test data file:
389250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
389350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # The tests are in a separate file 't/op/re_tests'.
389450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Each line in that file is a separate test.
389550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # There are five columns, separated by tabs.
389650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
389750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 1 contains the pattern, optionally enclosed in C<''>.
389850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Modifiers can be put after the closing C<'>.
389950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
390050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 2 contains the string to be matched.
390150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
390250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 3 contains the expected result:
390350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #     y   expect a match
390450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #     n   expect no match
390550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #     c   expect an error
390650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # B   test exposes a known bug in Perl, should be skipped
390750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # b   test exposes a known bug in Perl, should be skipped if noamp
390850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
390950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Columns 4 and 5 are used only if column 3 contains C<y> or C<c>.
391050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
391150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 4 contains a string, usually C<$&>.
391250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
391350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 5 contains the expected result of double-quote
391450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # interpolating that string after the match, or start of error message.
391550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
391650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 6, if present, contains a reason why the test is skipped.
391750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # This is printed with "skipped", for harness to pick up.
391850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
391950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # \n in the tests are interpolated, as are variables of the form ${\w+}.
392050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
392150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # If you want to add a regular expression test that can't be expressed
392250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # in this format, don't add it here: put it in op/pat.t instead.
392350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
392450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        For ICU, if field 3 contains an 'i', the test will be skipped.
392550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        The test exposes is some known incompatibility between ICU and Perl regexps.
392650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        (The i is in addition to whatever was there before.)
392750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
392850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------
392950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PerlTests() {
393050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char tdd[2048];
393150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char *srcPath;
393250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode  status = U_ZERO_ERROR;
393350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError pe;
393450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
393550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
393650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Open and read the test data file.
393750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
393850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    srcPath=getPath(tdd, "re_tests.txt");
393950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(srcPath==NULL) {
394050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return; /* something went wrong, error already output */
394150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
394250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
394350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t    len;
394450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status);
394550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
394650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return; /* something went wrong, error already output */
394750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
394850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
394950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
395050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Put the test data into a UnicodeString
395150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
395250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString testDataString(FALSE, testData, len);
395350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
395450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
395550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Regex to break the input file into lines, and strip the new lines.
395650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     One line per match, capture group one is the desired data.
395750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
395850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status);
395950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
396050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("RegexPattern::compile() error");
396150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
396250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
396350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher* lineMat = linePat->matcher(testDataString, status);
396450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
396550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
396650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Regex to split a test file line into fields.
396750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    There are six fields, separated by tabs.
396850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
396950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status);
397050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
397150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
397250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Regex to identify test patterns with flag settings, and to separate them.
397350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    Test patterns with flags look like 'pattern'i
397450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    Test patterns without flags are not quoted:   pattern
397550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   Coming out, capture group 2 is the pattern, capture group 3 is the flags.
397650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
397750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status);
397850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher* flagMat = flagPat->matcher(status);
397950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
398050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
398150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // The Perl tests reference several perl-isms, which are evaluated/substituted
398250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   in the test data.  Not being perl, this must be done explicitly.  Here
398350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   are string constants and REs for these constructs.
398450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
398550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString nulnulSrc("${nulnul}");
398650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString nulnul("\\u0000\\u0000", -1, US_INV);
398750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    nulnul = nulnul.unescape();
398850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
398950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString ffffSrc("${ffff}");
399050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString ffff("\\uffff", -1, US_INV);
399150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ffff = ffff.unescape();
399250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
399350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  regexp for $-[0], $+[2], etc.
399450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status);
399550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher *groupsMat = groupsPat->matcher(status);
399650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
399750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  regexp for $0, $1, $2, etc.
399850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status);
399950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher *cgMat = cgPat->matcher(status);
400050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
400150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
400250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
400350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Main Loop for the Perl Tests, runs once per line from the
400450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   test data file.
400550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
400650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t  lineNum = 0;
400750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t  skippedUnimplementedCount = 0;
400850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while (lineMat->find()) {
400950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lineNum++;
401050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
401150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
401250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Get a line, break it into its fields, do the Perl
401350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    variable substitutions.
401450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
401550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString line = lineMat->group(1, status);
401650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString fields[7];
401750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fieldPat->split(line, fields, 7, status);
401850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
401950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        flagMat->reset(fields[0]);
402050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        flagMat->matches(status);
402150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString pattern  = flagMat->group(2, status);
402250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pattern.findAndReplace("${bang}", "!");
402350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000"));
402450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pattern.findAndReplace(ffffSrc, ffff);
402550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
402650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
402750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Identify patterns that include match flag settings,
402850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    split off the flags, remove the extra quotes.
402950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
403050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString flagStr = flagMat->group(3, status);
403150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
403250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
403350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return;
403450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
403550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t flags = 0;
403650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar UChar_c = 0x63;  // Char constants for the flag letters.
403750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar UChar_i = 0x69;  //   (Damn the lack of Unicode support in C)
403850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar UChar_m = 0x6d;
403950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar UChar_x = 0x78;
404050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar UChar_y = 0x79;
404150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flagStr.indexOf(UChar_i) != -1) {
404250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            flags |= UREGEX_CASE_INSENSITIVE;
404350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
404450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flagStr.indexOf(UChar_m) != -1) {
404550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            flags |= UREGEX_MULTILINE;
404650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
404750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flagStr.indexOf(UChar_x) != -1) {
404850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            flags |= UREGEX_COMMENTS;
404950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
405050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
405150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
405250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Compile the test pattern.
405350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
405450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
405550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *testPat = RegexPattern::compile(pattern, flags, pe, status);
405650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status == U_REGEX_UNIMPLEMENTED) {
405750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //
405850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Test of a feature that is planned for ICU, but not yet implemented.
405950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   skip the test.
406050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            skippedUnimplementedCount++;
406150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testPat;
406250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
406350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
406450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
406550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
406650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
406750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Some tests are supposed to generate errors.
406850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   Only report an error for tests that are supposed to succeed.
406950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fields[2].indexOf(UChar_c) == -1  &&  // Compilation is not supposed to fail AND
407050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fields[2].indexOf(UChar_i) == -1)     //   it's not an accepted ICU incompatibility
407150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
407250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status));
407350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
407450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
407550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testPat;
407650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
407750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
407850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
407950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fields[2].indexOf(UChar_i) >= 0) {
408050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // ICU should skip this test.
408150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testPat;
408250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
408350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
408450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
408550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fields[2].indexOf(UChar_c) >= 0) {
408650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // This pattern should have caused a compilation error, but didn't/
408750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("line %d: Expected a pattern compile error, got success.", lineNum);
408850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testPat;
408950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
409050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
409150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
409250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
409350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // replace the Perl variables that appear in some of the
409450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   match data strings.
409550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
409650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString matchString = fields[1];
409750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matchString.findAndReplace(nulnulSrc, nulnul);
409850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matchString.findAndReplace(ffffSrc,   ffff);
409950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
410050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Replace any \n in the match string with an actual new-line char.
410150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Don't do full unescape, as this unescapes more than Perl does, which
410250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  causes other spurious failures in the tests.
410350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
410450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
410550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
410650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
410750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
410850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Run the test, check for expected match/don't match result.
410950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
411050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher *testMat = testPat->matcher(matchString, status);
411150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UBool found = testMat->find();
411250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UBool expected = FALSE;
411350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fields[2].indexOf(UChar_y) >=0) {
411450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            expected = TRUE;
411550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
411650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (expected != found) {
411750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("line %d: Expected %smatch, got %smatch",
411850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                lineNum, expected?"":"no ", found?"":"no " );
411950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
412050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
412150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
412250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Don't try to check expected results if there is no match.
412350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   (Some have stuff in the expected fields)
412450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (!found) {
412550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testMat;
412650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testPat;
412750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
412850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
412950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
413050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
413150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Interpret the Perl expression from the fourth field of the data file,
413250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // building up an ICU string from the results of the ICU match.
413350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   The Perl expression will contain references to the results of
413450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //     a regex match, including the matched string, capture group strings,
413550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //     group starting and ending indicies, etc.
413650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
413750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString resultString;
413850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString perlExpr = fields[3];
413950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if SUPPORT_MUTATING_INPUT_STRING
414050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        groupsMat->reset(perlExpr);
414150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        cgMat->reset(perlExpr);
414250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
414350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
414450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (perlExpr.length() > 0) {
414550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !SUPPORT_MUTATING_INPUT_STRING
414650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  Perferred usage.  Reset after any modification to input string.
414750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            groupsMat->reset(perlExpr);
414850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            cgMat->reset(perlExpr);
414950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
415050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
415150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (perlExpr.startsWith("$&")) {
415250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                resultString.append(testMat->group(status));
415350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, 2);
415450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
415550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
415650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else if (groupsMat->lookingAt(status)) {
415750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // $-[0]   $+[2]  etc.
415850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UnicodeString digitString = groupsMat->group(2, status);
415950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t t = 0;
416050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
416150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UnicodeString plusOrMinus = groupsMat->group(1, status);
416250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t matchPosition;
416350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (plusOrMinus.compare("+") == 0) {
416450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    matchPosition = testMat->end(groupNum, status);
416550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
416650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    matchPosition = testMat->start(groupNum, status);
416750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
416850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (matchPosition != -1) {
416950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ICU_Utility::appendNumber(resultString, matchPosition);
417050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
417150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, groupsMat->end(status));
417250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
417350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
417450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else if (cgMat->lookingAt(status)) {
417550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // $1, $2, $3, etc.
417650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UnicodeString digitString = cgMat->group(1, status);
417750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t t = 0;
417850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
417950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (U_SUCCESS(status)) {
418050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    resultString.append(testMat->group(groupNum, status));
418150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    status = U_ZERO_ERROR;
418250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
418350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, cgMat->end(status));
418450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
418550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
418650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else if (perlExpr.startsWith("@-")) {
418750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t i;
418850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (i=0; i<=testMat->groupCount(); i++) {
418950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (i>0) {
419050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        resultString.append(" ");
419150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
419250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ICU_Utility::appendNumber(resultString, testMat->start(i, status));
419350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
419450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, 2);
419550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
419650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
419750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else if (perlExpr.startsWith("@+")) {
419850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t i;
419950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (i=0; i<=testMat->groupCount(); i++) {
420050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (i>0) {
420150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        resultString.append(" ");
420250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
420350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ICU_Utility::appendNumber(resultString, testMat->end(i, status));
420450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
420550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, 2);
420650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
420750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
420850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) {    // \Escape.  Take following char as a literal.
420950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                     //           or as an escaped sequence (e.g. \n)
421050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (perlExpr.length() > 1) {
421150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    perlExpr.remove(0, 1);  // Remove the '\', but only if not last char.
421250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
421350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c = perlExpr.charAt(0);
421450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                switch (c) {
421550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                case 'n':   c = '\n'; break;
421650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // add any other escape sequences that show up in the test expected results.
421750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
421850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                resultString.append(c);
421950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, 1);
422050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
422150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
422250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else  {
422350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Any characters from the perl expression that we don't explicitly
422450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //  recognize before here are assumed to be literals and copied
422550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //  as-is to the expected results.
422650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                resultString.append(perlExpr.charAt(0));
422750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, 1);
422850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
422950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
423050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (U_FAILURE(status)) {
423150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
423250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
423350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
423450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
423550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
423650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
423750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Expected Results Compare
423850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
423950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString expectedS(fields[4]);
424050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expectedS.findAndReplace(nulnulSrc, nulnul);
424150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expectedS.findAndReplace(ffffSrc,   ffff);
424250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
4243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
424550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (expectedS.compare(resultString) != 0) {
424650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            err("Line %d: Incorrect perl expression results.", lineNum);
424750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\"");
424850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
4249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
425050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete testMat;
425150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete testPat;
4252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
425550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // All done.  Clean up allocated stuff.
4256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
425750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete cgMat;
425850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete cgPat;
4259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
426050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete groupsMat;
426150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete groupsPat;
4262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
426350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete flagMat;
426450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete flagPat;
4265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
426650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete lineMat;
426750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete linePat;
4268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
426950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete fieldPat;
427050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete [] testData;
427150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
427250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
427350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount);
4274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------
4279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
428050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   PerlTestsUTF8  Run Perl's regular expression tests on UTF-8-based UTexts
428150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  (instead of using UnicodeStrings) to test the alternate engine.
428250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  The input file for this test is re_tests, the standard regular
428350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  expression test data distributed with the Perl source code.
428450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  See PerlTests() for more information.
4285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
4286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------
428750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PerlTestsUTF8() {
4288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char tdd[2048];
4289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *srcPath;
4290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode  status = U_ZERO_ERROR;
4291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError pe;
429250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalUConverterPointer UTF8Converter(ucnv_open("UTF-8", &status));
429350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText       patternText = UTEXT_INITIALIZER;
429450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char       *patternChars = NULL;
429550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     patternLength;
429650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     patternCapacity = 0;
429750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText       inputText = UTEXT_INITIALIZER;
429850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char       *inputChars = NULL;
429950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     inputLength;
430050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     inputCapacity = 0;
430150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
430250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
4303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Open and read the test data file.
4306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    srcPath=getPath(tdd, "re_tests.txt");
4308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(srcPath==NULL) {
4309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return; /* something went wrong, error already output */
4310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t    len;
4313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status);
4314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
4315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return; /* something went wrong, error already output */
4316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Put the test data into a UnicodeString
4320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString testDataString(FALSE, testData, len);
4322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Regex to break the input file into lines, and strip the new lines.
4325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     One line per match, capture group one is the desired data.
4326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status);
4328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
4329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        dataerrln("RegexPattern::compile() error");
4330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
4331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher* lineMat = linePat->matcher(testDataString, status);
4333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Regex to split a test file line into fields.
4336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    There are six fields, separated by tabs.
4337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status);
4339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Regex to identify test patterns with flag settings, and to separate them.
4342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    Test patterns with flags look like 'pattern'i
4343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    Test patterns without flags are not quoted:   pattern
4344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Coming out, capture group 2 is the pattern, capture group 3 is the flags.
4345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status);
4347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher* flagMat = flagPat->matcher(status);
4348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The Perl tests reference several perl-isms, which are evaluated/substituted
4351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   in the test data.  Not being perl, this must be done explicitly.  Here
4352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   are string constants and REs for these constructs.
4353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString nulnulSrc("${nulnul}");
4355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeString nulnul("\\u0000\\u0000", -1, US_INV);
4356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    nulnul = nulnul.unescape();
4357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString ffffSrc("${ffff}");
4359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeString ffff("\\uffff", -1, US_INV);
4360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ffff = ffff.unescape();
4361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  regexp for $-[0], $+[2], etc.
4363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status);
4364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *groupsMat = groupsPat->matcher(status);
4365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  regexp for $0, $1, $2, etc.
4367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status);
4368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *cgMat = cgPat->matcher(status);
4369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Main Loop for the Perl Tests, runs once per line from the
4373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   test data file.
4374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t  lineNum = 0;
4376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t  skippedUnimplementedCount = 0;
4377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while (lineMat->find()) {
4378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        lineNum++;
4379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  Get a line, break it into its fields, do the Perl
4382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //    variable substitutions.
4383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString line = lineMat->group(1, status);
4385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString fields[7];
4386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fieldPat->split(line, fields, 7, status);
4387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        flagMat->reset(fields[0]);
4389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        flagMat->matches(status);
4390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString pattern  = flagMat->group(2, status);
4391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pattern.findAndReplace("${bang}", "!");
4392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000"));
4393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pattern.findAndReplace(ffffSrc, ffff);
4394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  Identify patterns that include match flag settings,
4397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //    split off the flags, remove the extra quotes.
4398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString flagStr = flagMat->group(3, status);
4400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(status)) {
4401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
4402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return;
4403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t flags = 0;
4405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar UChar_c = 0x63;  // Char constants for the flag letters.
4406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar UChar_i = 0x69;  //   (Damn the lack of Unicode support in C)
4407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar UChar_m = 0x6d;
4408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar UChar_x = 0x78;
4409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar UChar_y = 0x79;
4410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (flagStr.indexOf(UChar_i) != -1) {
4411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            flags |= UREGEX_CASE_INSENSITIVE;
4412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (flagStr.indexOf(UChar_m) != -1) {
4414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            flags |= UREGEX_MULTILINE;
4415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (flagStr.indexOf(UChar_x) != -1) {
4417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            flags |= UREGEX_COMMENTS;
4418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
441950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
442050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
442150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Put the pattern in a UTF-8 UText
442250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
442350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
442450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        patternLength = pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status);
442550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status == U_BUFFER_OVERFLOW_ERROR) {
442650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
442750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete[] patternChars;
442850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            patternCapacity = patternLength + 1;
442950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            patternChars = new char[patternCapacity];
443050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status);
443150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
443250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&patternText, patternChars, patternLength, &status);
4433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Compile the test pattern.
4436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
443750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *testPat = RegexPattern::compile(&patternText, flags, pe, status);
4438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (status == U_REGEX_UNIMPLEMENTED) {
4439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //
4440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Test of a feature that is planned for ICU, but not yet implemented.
4441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   skip the test.
4442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            skippedUnimplementedCount++;
4443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete testPat;
4444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status = U_ZERO_ERROR;
4445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
4446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(status)) {
4449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Some tests are supposed to generate errors.
4450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   Only report an error for tests that are supposed to succeed.
4451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (fields[2].indexOf(UChar_c) == -1  &&  // Compilation is not supposed to fail AND
4452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fields[2].indexOf(UChar_i) == -1)     //   it's not an accepted ICU incompatibility
4453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status));
4455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status = U_ZERO_ERROR;
4457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete testPat;
4458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
4459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fields[2].indexOf(UChar_i) >= 0) {
4462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // ICU should skip this test.
4463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete testPat;
4464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
4465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fields[2].indexOf(UChar_c) >= 0) {
4468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // This pattern should have caused a compilation error, but didn't/
4469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("line %d: Expected a pattern compile error, got success.", lineNum);
4470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete testPat;
4471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
4472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
447450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // replace the Perl variables that appear in some of the
4477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //   match data strings.
4478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString matchString = fields[1];
4480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matchString.findAndReplace(nulnulSrc, nulnul);
4481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matchString.findAndReplace(ffffSrc,   ffff);
4482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Replace any \n in the match string with an actual new-line char.
4484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  Don't do full unescape, as this unescapes more than Perl does, which
4485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  causes other spurious failures in the tests.
4486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
4487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
448850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
448950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Put the input in a UTF-8 UText
449050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
449150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
449250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        inputLength = matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status);
449350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status == U_BUFFER_OVERFLOW_ERROR) {
449450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
449550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete[] inputChars;
449650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            inputCapacity = inputLength + 1;
449750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            inputChars = new char[inputCapacity];
449850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status);
449950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
450050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&inputText, inputChars, inputLength, &status);
4501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Run the test, check for expected match/don't match result.
4504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4505b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        RegexMatcher *testMat = &testPat->matcher(status)->reset(&inputText);
4506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool found = testMat->find();
4507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool expected = FALSE;
4508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fields[2].indexOf(UChar_y) >=0) {
4509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expected = TRUE;
4510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (expected != found) {
4512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("line %d: Expected %smatch, got %smatch",
4513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                lineNum, expected?"":"no ", found?"":"no " );
4514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
4515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Don't try to check expected results if there is no match.
4518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   (Some have stuff in the expected fields)
4519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (!found) {
4520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            delete testMat;
4521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            delete testPat;
4522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            continue;
4523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Interpret the Perl expression from the fourth field of the data file,
4527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // building up an ICU string from the results of the ICU match.
4528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //   The Perl expression will contain references to the results of
4529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //     a regex match, including the matched string, capture group strings,
4530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //     group starting and ending indicies, etc.
4531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString resultString;
4533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString perlExpr = fields[3];
4534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (perlExpr.length() > 0) {
453650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            groupsMat->reset(perlExpr);
453750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            cgMat->reset(perlExpr);
453850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (perlExpr.startsWith("$&")) {
4540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                resultString.append(testMat->group(status));
4541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, 2);
4542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else if (groupsMat->lookingAt(status)) {
4545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // $-[0]   $+[2]  etc.
4546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString digitString = groupsMat->group(2, status);
4547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t t = 0;
4548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
4549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString plusOrMinus = groupsMat->group(1, status);
4550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t matchPosition;
4551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (plusOrMinus.compare("+") == 0) {
4552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    matchPosition = testMat->end(groupNum, status);
4553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
4554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    matchPosition = testMat->start(groupNum, status);
4555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (matchPosition != -1) {
4557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ICU_Utility::appendNumber(resultString, matchPosition);
4558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, groupsMat->end(status));
4560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else if (cgMat->lookingAt(status)) {
4563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // $1, $2, $3, etc.
4564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString digitString = cgMat->group(1, status);
4565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t t = 0;
4566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
4567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (U_SUCCESS(status)) {
4568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    resultString.append(testMat->group(groupNum, status));
4569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    status = U_ZERO_ERROR;
4570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, cgMat->end(status));
4572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else if (perlExpr.startsWith("@-")) {
4575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t i;
4576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                for (i=0; i<=testMat->groupCount(); i++) {
4577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (i>0) {
4578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        resultString.append(" ");
4579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ICU_Utility::appendNumber(resultString, testMat->start(i, status));
4581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, 2);
4583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else if (perlExpr.startsWith("@+")) {
4586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t i;
4587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                for (i=0; i<=testMat->groupCount(); i++) {
4588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (i>0) {
4589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        resultString.append(" ");
4590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ICU_Utility::appendNumber(resultString, testMat->end(i, status));
4592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, 2);
4594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) {    // \Escape.  Take following char as a literal.
4597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                     //           or as an escaped sequence (e.g. \n)
4598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (perlExpr.length() > 1) {
4599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    perlExpr.remove(0, 1);  // Remove the '\', but only if not last char.
4600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UChar c = perlExpr.charAt(0);
4602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                switch (c) {
4603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                case 'n':   c = '\n'; break;
4604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // add any other escape sequences that show up in the test expected results.
4605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                resultString.append(c);
4607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, 1);
4608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else  {
4611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Any characters from the perl expression that we don't explicitly
4612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //  recognize before here are assumed to be literals and copied
4613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //  as-is to the expected results.
4614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                resultString.append(perlExpr.charAt(0));
4615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, 1);
4616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(status)) {
4619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
4620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
4621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Expected Results Compare
4626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString expectedS(fields[4]);
4628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectedS.findAndReplace(nulnulSrc, nulnul);
4629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectedS.findAndReplace(ffffSrc,   ffff);
4630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
4631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (expectedS.compare(resultString) != 0) {
4634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            err("Line %d: Incorrect perl expression results.", lineNum);
463550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\"");
4636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete testMat;
4639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete testPat;
4640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // All done.  Clean up allocated stuff.
4644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete cgMat;
4646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete cgPat;
4647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete groupsMat;
4649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete groupsPat;
4650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete flagMat;
4652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete flagPat;
4653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete lineMat;
4655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete linePat;
4656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete fieldPat;
4658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete [] testData;
465950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
466050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&patternText);
466150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&inputText);
466250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
466350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete [] patternChars;
466450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete [] inputChars;
4665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount);
4668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4672b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//--------------------------------------------------------------
4673b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//
4674b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//  Bug6149   Verify limits to heap expansion for backtrack stack.
4675b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//             Use this pattern,
4676b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//                 "(a?){1,}"
4677b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//             The zero-length match will repeat forever.
4678b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//                (That this goes into a loop is another bug)
4679b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//
4680b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//---------------------------------------------------------------
4681b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid RegexTest::Bug6149() {
4682b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeString pattern("(a?){1,}");
4683b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeString s("xyz");
4684b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    uint32_t flags = 0;
4685b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
4686b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
4687b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    RegexMatcher  matcher(pattern, s, flags, status);
4688b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UBool result = false;
4689b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    REGEX_ASSERT_FAIL(result=matcher.matches(status), U_REGEX_STACK_OVERFLOW);
4690b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    REGEX_ASSERT(result == FALSE);
4691b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }
4692b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
4693b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
4694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
4695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   Callbacks()    Test the callback function.
4696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                  When set, callbacks occur periodically during matching operations,
4697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                  giving the application code the ability to abort the operation
4698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                  before it's normal completion.
4699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
4700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustruct callBackContext {
4702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexTest        *test;
4703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t          maxCalls;
4704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t          numCalls;
4705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t          lastSteps;
4706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    void reset(int32_t max) {maxCalls=max; numCalls=0; lastSteps=0;};
4707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru};
4708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CDECL_BEGIN
4710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic UBool U_CALLCONV
4711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QuerutestCallBackFn(const void *context, int32_t steps) {
4712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    callBackContext  *info = (callBackContext *)context;
4713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (info->lastSteps+1 != steps) {
4714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        info->test->errln("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
4715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    info->lastSteps = steps;
4717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    info->numCalls++;
4718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (info->numCalls < info->maxCalls);
4719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
4720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CDECL_END
4721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid RegexTest::Callbacks() {
4723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru   {
4724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Getter returns NULLs if no callback has been set
4725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   The variables that the getter will fill in.
4727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   Init to non-null values so that the action of the getter can be seen.
4728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        const void          *returnedContext = &returnedContext;
4729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        URegexMatchCallback *returnedFn = &testCallBackFn;
4730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
4732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher("x", 0, status);
4733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.getMatchCallback(returnedFn, returnedContext, status);
4735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(returnedFn == NULL);
4737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(returnedContext == NULL);
4738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru   {
4741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Set and Get work
4742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        callBackContext cbInfo = {this, 0, 0, 0};
4743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        const void          *returnedContext;
4744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        URegexMatchCallback *returnedFn;
4745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
4746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);  // A pattern that can run long.
4747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setMatchCallback(testCallBackFn, &cbInfo, status);
4749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.getMatchCallback(returnedFn, returnedContext, status);
4751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(returnedFn == testCallBackFn);
4753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(returnedContext == &cbInfo);
4754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // A short-running match shouldn't invoke the callback
4756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
4757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        cbInfo.reset(1);
4758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString s = "xxx";
4759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.reset(s);
4760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.matches(status));
4761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(cbInfo.numCalls == 0);
4763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // A medium-length match that runs long enough to invoke the
4765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   callback, but not so long that the callback aborts it.
4766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
4767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        cbInfo.reset(4);
4768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s = "aaaaaaaaaaaaaaaaaaab";
4769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.reset(s);
4770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.matches(status)==FALSE);
4771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(cbInfo.numCalls > 0);
4773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // A longer running match that the callback function will abort.
4775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
4776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        cbInfo.reset(4);
4777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s = "aaaaaaaaaaaaaaaaaaaaaaab";
4778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.reset(s);
4779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.matches(status)==FALSE);
4780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
4781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(cbInfo.numCalls == 4);
4782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
4786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
478750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
478827f654740f2a26ad62a5c155af9199af9e69b889claireho//
478927f654740f2a26ad62a5c155af9199af9e69b889claireho//   FindProgressCallbacks()    Test the find "progress" callback function.
479027f654740f2a26ad62a5c155af9199af9e69b889claireho//                  When set, the find progress callback will be invoked during a find operations
479127f654740f2a26ad62a5c155af9199af9e69b889claireho//                  after each return from a match attempt, giving the application the opportunity
479227f654740f2a26ad62a5c155af9199af9e69b889claireho//                  to terminate a long-running find operation before it's normal completion.
479327f654740f2a26ad62a5c155af9199af9e69b889claireho//
479427f654740f2a26ad62a5c155af9199af9e69b889claireho
479527f654740f2a26ad62a5c155af9199af9e69b889clairehostruct progressCallBackContext {
479627f654740f2a26ad62a5c155af9199af9e69b889claireho    RegexTest        *test;
479727f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t          lastIndex;
479827f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t          maxCalls;
479927f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t          numCalls;
480027f654740f2a26ad62a5c155af9199af9e69b889claireho    void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};
480127f654740f2a26ad62a5c155af9199af9e69b889claireho};
480227f654740f2a26ad62a5c155af9199af9e69b889claireho
480327f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_BEGIN
480427f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool U_CALLCONV
480527f654740f2a26ad62a5c155af9199af9e69b889clairehotestProgressCallBackFn(const void *context, int64_t matchIndex) {
480627f654740f2a26ad62a5c155af9199af9e69b889claireho    progressCallBackContext  *info = (progressCallBackContext *)context;
480727f654740f2a26ad62a5c155af9199af9e69b889claireho    info->numCalls++;
480827f654740f2a26ad62a5c155af9199af9e69b889claireho    info->lastIndex = matchIndex;
480927f654740f2a26ad62a5c155af9199af9e69b889claireho//    info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n", matchIndex, info->numCalls);
481027f654740f2a26ad62a5c155af9199af9e69b889claireho    return (info->numCalls < info->maxCalls);
481127f654740f2a26ad62a5c155af9199af9e69b889claireho}
481227f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_END
481327f654740f2a26ad62a5c155af9199af9e69b889claireho
481427f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::FindProgressCallbacks() {
481527f654740f2a26ad62a5c155af9199af9e69b889claireho   {
481627f654740f2a26ad62a5c155af9199af9e69b889claireho        // Getter returns NULLs if no callback has been set
481727f654740f2a26ad62a5c155af9199af9e69b889claireho
481827f654740f2a26ad62a5c155af9199af9e69b889claireho        //   The variables that the getter will fill in.
481927f654740f2a26ad62a5c155af9199af9e69b889claireho        //   Init to non-null values so that the action of the getter can be seen.
482027f654740f2a26ad62a5c155af9199af9e69b889claireho        const void                  *returnedContext = &returnedContext;
482127f654740f2a26ad62a5c155af9199af9e69b889claireho        URegexFindProgressCallback  *returnedFn = &testProgressCallBackFn;
482227f654740f2a26ad62a5c155af9199af9e69b889claireho
482327f654740f2a26ad62a5c155af9199af9e69b889claireho        UErrorCode status = U_ZERO_ERROR;
482427f654740f2a26ad62a5c155af9199af9e69b889claireho        RegexMatcher matcher("x", 0, status);
482527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
482627f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.getFindProgressCallback(returnedFn, returnedContext, status);
482727f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
482827f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(returnedFn == NULL);
482927f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(returnedContext == NULL);
483027f654740f2a26ad62a5c155af9199af9e69b889claireho    }
483127f654740f2a26ad62a5c155af9199af9e69b889claireho
483227f654740f2a26ad62a5c155af9199af9e69b889claireho   {
483327f654740f2a26ad62a5c155af9199af9e69b889claireho        // Set and Get work
483427f654740f2a26ad62a5c155af9199af9e69b889claireho        progressCallBackContext cbInfo = {this, 0, 0, 0};
483527f654740f2a26ad62a5c155af9199af9e69b889claireho        const void                  *returnedContext;
483627f654740f2a26ad62a5c155af9199af9e69b889claireho        URegexFindProgressCallback  *returnedFn;
483727f654740f2a26ad62a5c155af9199af9e69b889claireho        UErrorCode status = U_ZERO_ERROR;
483827f654740f2a26ad62a5c155af9199af9e69b889claireho        RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);  // A pattern that can run long.
483927f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
484027f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status);
484127f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
484227f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.getFindProgressCallback(returnedFn, returnedContext, status);
484327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
484427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(returnedFn == testProgressCallBackFn);
484527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(returnedContext == &cbInfo);
484627f654740f2a26ad62a5c155af9199af9e69b889claireho
484727f654740f2a26ad62a5c155af9199af9e69b889claireho        // A short-running match should NOT invoke the callback.
484827f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
484927f654740f2a26ad62a5c155af9199af9e69b889claireho        cbInfo.reset(100);
485027f654740f2a26ad62a5c155af9199af9e69b889claireho        UnicodeString s = "abxxx";
485127f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.reset(s);
485227f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0
485327f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.setTrace(TRUE);
485427f654740f2a26ad62a5c155af9199af9e69b889claireho#endif
485527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(matcher.find(0, status));
485627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
485727f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(cbInfo.numCalls == 0);
485827f654740f2a26ad62a5c155af9199af9e69b889claireho
485927f654740f2a26ad62a5c155af9199af9e69b889claireho        // A medium running match that causes matcher.find() to invoke our callback for each index.
486027f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
486127f654740f2a26ad62a5c155af9199af9e69b889claireho        s = "aaaaaaaaaaaaaaaaaaab";
486227f654740f2a26ad62a5c155af9199af9e69b889claireho        cbInfo.reset(s.length()); //  Some upper limit for number of calls that is greater than size of our input string
486327f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.reset(s);
486427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(matcher.find(0, status)==FALSE);
486527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
486627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25);
486727f654740f2a26ad62a5c155af9199af9e69b889claireho
486827f654740f2a26ad62a5c155af9199af9e69b889claireho        // A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point.
486927f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
487027f654740f2a26ad62a5c155af9199af9e69b889claireho        UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab";
487127f654740f2a26ad62a5c155af9199af9e69b889claireho        cbInfo.reset(s1.length() - 5); //  Bail early somewhere near the end of input string
487227f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.reset(s1);
487327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(matcher.find(0, status)==FALSE);
487427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
487527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5);
487627f654740f2a26ad62a5c155af9199af9e69b889claireho
487727f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0
487827f654740f2a26ad62a5c155af9199af9e69b889claireho        // Now a match that will succeed, but after an interruption
487927f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
488027f654740f2a26ad62a5c155af9199af9e69b889claireho        UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx";
488127f654740f2a26ad62a5c155af9199af9e69b889claireho        cbInfo.reset(s2.length() - 10); //  Bail early somewhere near the end of input string
488227f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.reset(s2);
488327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(matcher.find(0, status)==FALSE);
488427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
488527f654740f2a26ad62a5c155af9199af9e69b889claireho        // Now retry the match from where left off
488627f654740f2a26ad62a5c155af9199af9e69b889claireho        cbInfo.maxCalls = 100; //  No callback limit
488727f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status));
488827f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
488927f654740f2a26ad62a5c155af9199af9e69b889claireho#endif
489027f654740f2a26ad62a5c155af9199af9e69b889claireho    }
489127f654740f2a26ad62a5c155af9199af9e69b889claireho
489227f654740f2a26ad62a5c155af9199af9e69b889claireho
489327f654740f2a26ad62a5c155af9199af9e69b889claireho}
489427f654740f2a26ad62a5c155af9199af9e69b889claireho
489527f654740f2a26ad62a5c155af9199af9e69b889claireho
489650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
489750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
489850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    PreAllocatedUTextCAPI    Check the C API with pre-allocated mutable
489950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                             UTexts. The pure-C implementation of UText
490050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                             has no mutable backing stores, but we can
490150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                             use UnicodeString here to test the functionality.
490250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
490350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
490450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PreAllocatedUTextCAPI () {
490550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode           status = U_ZERO_ERROR;
490650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    URegularExpression  *re;
490750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText                patternText = UTEXT_INITIALIZER;
490850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString        buffer;
490950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText                bufferText = UTEXT_INITIALIZER;
491050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
491150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUnicodeString(&bufferText, &buffer, &status);
491250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
491350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
491450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *  getText() and getUText()
491550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
491650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
491750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText  text1 = UTEXT_INITIALIZER;
491850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText  text2 = UTEXT_INITIALIZER;
491950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar  text2Chars[20];
492050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText  *resultText;
492150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
492250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
492327f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status);
492427f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status);
492550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2);
492650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUChars(&text2, text2Chars, -1, &status);
492750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
492827f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status);
492950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        re = uregex_openUText(&patternText, 0, NULL, &status);
493050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
493150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* First set a UText */
493250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setUText(re, &text1, &status);
493350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resultText = uregex_getUText(re, &bufferText, &status);
493450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
493550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(resultText == &bufferText);
493650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(resultText, 0);
493750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(&text1, 0);
493850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
493950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
494050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resultText = uregex_getUText(re, &bufferText, &status);
494150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
494250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(resultText == &bufferText);
494350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(resultText, 0);
494450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(&text1, 0);
494550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
494650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
494750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* Then set a UChar * */
494850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text2Chars, 7, &status);
494950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resultText = uregex_getUText(re, &bufferText, &status);
495050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
495150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(resultText == &bufferText);
495250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(resultText, 0);
495350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(&text2, 0);
495450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0);
495550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
495650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_close(re);
495750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&text1);
495850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&text2);
495950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
496050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
496150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
496250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *  group()
496350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
496450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
496550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar    text1[80];
496650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText   *actual;
496750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UBool    result;
496850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
496950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
497050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
497150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
497250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
497350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
497450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text1, -1, &status);
497550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_find(re, 0, &status);
497650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result==TRUE);
497750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
497850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*  Capture Group 0, the full match.  Should succeed.  */
497950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
498027f654740f2a26ad62a5c155af9199af9e69b889claireho        actual = uregex_groupUTextDeep(re, 0, &bufferText, &status);
498150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
498250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(actual == &bufferText);
498327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual);
498450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
498550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*  Capture group #1.  Should succeed. */
498650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
498727f654740f2a26ad62a5c155af9199af9e69b889claireho        actual = uregex_groupUTextDeep(re, 1, &bufferText, &status);
498850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
498950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(actual == &bufferText);
499027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual);
499150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
499250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*  Capture group out of range.  Error. */
499350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
499427f654740f2a26ad62a5c155af9199af9e69b889claireho        actual = uregex_groupUTextDeep(re, 2, &bufferText, &status);
499550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
499650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(actual == &bufferText);
499750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
499850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_close(re);
499950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
500050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
500150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
500250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
500350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *  replaceFirst()
500450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
500550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
500650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar    text1[80];
500750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar    text2[80];
500850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText    replText = UTEXT_INITIALIZER;
500950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText   *result;
501050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
501150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
501250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
501350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
501427f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);
501550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
501650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        re = uregex_openC("x(.*?)x", 0, NULL, &status);
501750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
501850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
501950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*  Normal case, with match */
502050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text1, -1, &status);
502150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
502250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
502350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
502450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &bufferText);
502527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result);
502650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
502750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* No match.  Text should copy to output with no changes.  */
502850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text2, -1, &status);
502950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
503050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
503150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
503250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &bufferText);
503327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);
503450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
503550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* Unicode escapes */
503650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text1, -1, &status);
503727f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status);
503850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
503950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
504050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
504150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &bufferText);
504227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result);
504350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
504450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_close(re);
504550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&replText);
504650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
504750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
504850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
504950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
505050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *  replaceAll()
505150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
505250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
505350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar    text1[80];
505450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar    text2[80];
505550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText    replText = UTEXT_INITIALIZER;
505650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText   *result;
505750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
505850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
505950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
506050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
506127f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);
506250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
506350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        re = uregex_openC("x(.*?)x", 0, NULL, &status);
506450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
506550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
506650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*  Normal case, with match */
506750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text1, -1, &status);
506850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
506950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_replaceAllUText(re, &replText, &bufferText, &status);
507050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
507150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &bufferText);
507227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> <1> <...>.", result);
507350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
507450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* No match.  Text should copy to output with no changes.  */
507550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text2, -1, &status);
507650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
507750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_replaceAllUText(re, &replText, &bufferText, &status);
507850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
507950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &bufferText);
508027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);
508150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
508250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_close(re);
508350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&replText);
508450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
508550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
508650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
508750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
508850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *  splitUText() uses the C++ API directly, and the UnicodeString version uses mutable UTexts,
508950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *   so we don't need to test it here.
509050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
509150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
509250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&bufferText);
509350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&patternText);
509450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
509550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
509650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------
509750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
509850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  Bug7651   Regex pattern that exceeds default operator stack depth in matcher.
509950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
510050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------
510150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Bug7651() {
510250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString pattern1("((?<![A-Za-z0-9])[#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|(?<![A-Za-z0-9_])[@\\uff20][A-Za-z0-9_]+(?:\\/[\\w-]+)?|(https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|\\$[A-Za-z]+)");
510350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  The following should exceed the default operator stack depth in the matcher, i.e. force the matcher to malloc instead of using fSmallData.
510427f654740f2a26ad62a5c155af9199af9e69b889claireho    //  It will cause a segfault if RegexMatcher tries to use fSmallData instead of malloc'ing the memory needed (see init2) for the pattern operator stack allocation.
510550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString pattern2("((https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|(?<![A-Za-z0-9_])[\\@\\uff20][A-Za-z0-9_]+(?:\\/[\\w\\-]+)?|(?<![A-Za-z0-9])[\\#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|\\$[A-Za-z]+)");
510650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString s("#ff @abcd This is test");
510750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern  *REPattern = NULL;
510850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher  *REMatcher = NULL;
510950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode status = U_ZERO_ERROR;
511050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError pe;
511150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
511250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REPattern = RegexPattern::compile(pattern1, 0, pe, status);
511350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
511450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REMatcher = REPattern->matcher(s, status);
511550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
511650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(REMatcher->find());
511750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(REMatcher->start(status) == 0);
511850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REPattern;
511950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REMatcher;
512050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
512150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
512250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REPattern = RegexPattern::compile(pattern2, 0, pe, status);
512350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
512450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REMatcher = REPattern->matcher(s, status);
512550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
512650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(REMatcher->find());
512750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(REMatcher->start(status) == 0);
512850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REPattern;
512950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REMatcher;
513050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
513150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }
513250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
513327f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::Bug7740() {
513427f654740f2a26ad62a5c155af9199af9e69b889claireho    UErrorCode status = U_ZERO_ERROR;
513527f654740f2a26ad62a5c155af9199af9e69b889claireho    UnicodeString pattern = "(a)";
513627f654740f2a26ad62a5c155af9199af9e69b889claireho    UnicodeString text = "abcdef";
513727f654740f2a26ad62a5c155af9199af9e69b889claireho    RegexMatcher *m = new RegexMatcher(pattern, text, 0, status);
513827f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_CHECK_STATUS;
513927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT(m->lookingAt(status));
514027f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_CHECK_STATUS;
514127f654740f2a26ad62a5c155af9199af9e69b889claireho    status = U_ILLEGAL_ARGUMENT_ERROR;
514227f654740f2a26ad62a5c155af9199af9e69b889claireho    UnicodeString s = m->group(1, status);    // Bug 7740: segfault here.
514327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
514427f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT(s == "");
514527f654740f2a26ad62a5c155af9199af9e69b889claireho    delete m;
514627f654740f2a26ad62a5c155af9199af9e69b889claireho}
514727f654740f2a26ad62a5c155af9199af9e69b889claireho
5148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Bug 8479:  was crashing whith a Bogus UnicodeString as input.
5149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
5150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::Bug8479() {
5151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UErrorCode status = U_ZERO_ERROR;
515227f654740f2a26ad62a5c155af9199af9e69b889claireho
5153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    RegexMatcher* const pMatcher = new RegexMatcher("\\Aboo\\z", UREGEX_DOTALL|UREGEX_CASE_INSENSITIVE, status);
5154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
5155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_SUCCESS(status))
5156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    {
5157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UnicodeString str;
5158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        str.setToBogus();
5159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pMatcher->reset(str);
5160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_ZERO_ERROR;
5161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pMatcher->matches(status);
5162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
5163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        delete pMatcher;
5164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
5165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
516627f654740f2a26ad62a5c155af9199af9e69b889claireho
516727f654740f2a26ad62a5c155af9199af9e69b889claireho
5168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Bug 7029
5169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::Bug7029() {
5170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UErrorCode status = U_ZERO_ERROR;
5171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
5172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status);
5173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString text = "abc.def";
5174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString splits[10];
5175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
5176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t numFields = pMatcher->split(text, splits, 10, status);
5177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
5178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(numFields == 8);
5179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete pMatcher;
5180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
5181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
5182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::CheckInvBufSize() {
5183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  if(inv_next>=INV_BUFSIZ) {
5184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %d )\n",
5185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho          __FILE__, INV_BUFSIZ, inv_next);
5186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  } else {
5187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next);
5188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  }
5189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
5190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
5191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
5192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5193