1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/********************************************************************
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT:
3103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Copyright (c) 2002-2012, International Business Machines Corporation and
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************/
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   regextst.cpp
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      ICU Regular Expressions test, part of intltest.
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     NOTE!!
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     PLEASE be careful about ASCII assumptions in this test.
17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     This test is one of the worst repeat offenders.
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     If you have questions, contact someone on the ICU PMC
19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     who has access to an EBCDIC system.
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "intltest.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h"
29103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/uniset.h"
3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/ustring.h"
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regextst.h"
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h"
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h>
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h>
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
3727f654740f2a26ad62a5c155af9199af9e69b889claireho#include "cstring.h"
3827f654740f2a26ad62a5c155af9199af9e69b889claireho#include "uinvchar.h"
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define SUPPORT_MUTATING_INPUT_STRING   0
4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  Test class boilerplate
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexTest::RegexTest()
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexTest::~RegexTest()
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (exec) logln("TestSuite RegexTest: ");
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch (index) {
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 0: name = "Basic";
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) Basic();
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 1: name = "API_Match";
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) API_Match();
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 2: name = "API_Replace";
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) API_Replace();
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 3: name = "API_Pattern";
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) API_Pattern();
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case 4:
7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            name = "Extended";
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) Extended();
7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else
8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            name = "skip";
8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 5: name = "Errors";
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) Errors();
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 6: name = "PerlTests";
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (exec) PerlTests();
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
89c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case 7: name = "Callbacks";
90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if (exec) Callbacks();
91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            break;
9227f654740f2a26ad62a5c155af9199af9e69b889claireho        case 8: name = "FindProgressCallbacks";
9327f654740f2a26ad62a5c155af9199af9e69b889claireho            if (exec) FindProgressCallbacks();
9427f654740f2a26ad62a5c155af9199af9e69b889claireho            break;
9527f654740f2a26ad62a5c155af9199af9e69b889claireho        case 9: name = "Bug 6149";
96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             if (exec) Bug6149();
97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             break;
9827f654740f2a26ad62a5c155af9199af9e69b889claireho        case 10: name = "UTextBasic";
9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) UTextBasic();
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
10127f654740f2a26ad62a5c155af9199af9e69b889claireho        case 11: name = "API_Match_UTF8";
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) API_Match_UTF8();
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
10427f654740f2a26ad62a5c155af9199af9e69b889claireho        case 12: name = "API_Replace_UTF8";
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) API_Replace_UTF8();
10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
10727f654740f2a26ad62a5c155af9199af9e69b889claireho        case 13: name = "API_Pattern_UTF8";
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) API_Pattern_UTF8();
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
11027f654740f2a26ad62a5c155af9199af9e69b889claireho        case 14: name = "PerlTestsUTF8";
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) PerlTestsUTF8();
11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
11327f654740f2a26ad62a5c155af9199af9e69b889claireho        case 15: name = "PreAllocatedUTextCAPI";
11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          if (exec) PreAllocatedUTextCAPI();
11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho          break;
11627f654740f2a26ad62a5c155af9199af9e69b889claireho        case 16: name = "Bug 7651";
11727f654740f2a26ad62a5c155af9199af9e69b889claireho             if (exec) Bug7651();
11827f654740f2a26ad62a5c155af9199af9e69b889claireho             break;
11927f654740f2a26ad62a5c155af9199af9e69b889claireho        case 17: name = "Bug 7740";
12027f654740f2a26ad62a5c155af9199af9e69b889claireho            if (exec) Bug7740();
12127f654740f2a26ad62a5c155af9199af9e69b889claireho            break;
122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        case 18: name = "Bug 8479";
123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (exec) Bug8479();
124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        case 19: name = "Bug 7029";
126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (exec) Bug7029();
127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        case 20: name = "CheckInvBufSize";
129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (exec) CheckInvBufSize();
130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
131103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case 21: name = "Bug 9283";
132103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if (exec) Bug9283();
133103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        default: name = "";
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break; //needed to end loop
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
14227f654740f2a26ad62a5c155af9199af9e69b889claireho/**
14327f654740f2a26ad62a5c155af9199af9e69b889claireho * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage
14427f654740f2a26ad62a5c155af9199af9e69b889claireho * into ASCII.
14527f654740f2a26ad62a5c155af9199af9e69b889claireho * @see utext_openUTF8
14627f654740f2a26ad62a5c155af9199af9e69b889claireho */
14727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status);
14827f654740f2a26ad62a5c155af9199af9e69b889claireho
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   Error Checking / Reporting macros used in all of the tests.
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
15527f654740f2a26ad62a5c155af9199af9e69b889clairehostatic void utextToPrintable(char *buf, int32_t bufLen, UText *text) {
15627f654740f2a26ad62a5c155af9199af9e69b889claireho  int64_t oldIndex = utext_getNativeIndex(text);
15727f654740f2a26ad62a5c155af9199af9e69b889claireho  utext_setNativeIndex(text, 0);
15827f654740f2a26ad62a5c155af9199af9e69b889claireho  char *bufPtr = buf;
15927f654740f2a26ad62a5c155af9199af9e69b889claireho  UChar32 c = utext_next32From(text, 0);
16027f654740f2a26ad62a5c155af9199af9e69b889claireho  while ((c != U_SENTINEL) && (bufPtr < buf+bufLen)) {
16127f654740f2a26ad62a5c155af9199af9e69b889claireho    if (0x000020<=c && c<0x00007e) {
16227f654740f2a26ad62a5c155af9199af9e69b889claireho      *bufPtr = c;
16327f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
16427f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0
16527f654740f2a26ad62a5c155af9199af9e69b889claireho      sprintf(bufPtr,"U+%04X", c);
16627f654740f2a26ad62a5c155af9199af9e69b889claireho      bufPtr+= strlen(bufPtr)-1;
16727f654740f2a26ad62a5c155af9199af9e69b889claireho#else
16827f654740f2a26ad62a5c155af9199af9e69b889claireho      *bufPtr = '%';
16927f654740f2a26ad62a5c155af9199af9e69b889claireho#endif
17027f654740f2a26ad62a5c155af9199af9e69b889claireho    }
17127f654740f2a26ad62a5c155af9199af9e69b889claireho    bufPtr++;
17227f654740f2a26ad62a5c155af9199af9e69b889claireho    c = UTEXT_NEXT32(text);
17327f654740f2a26ad62a5c155af9199af9e69b889claireho  }
17427f654740f2a26ad62a5c155af9199af9e69b889claireho  *bufPtr = 0;
17527f654740f2a26ad62a5c155af9199af9e69b889claireho#if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY)
17627f654740f2a26ad62a5c155af9199af9e69b889claireho  char *ebuf = (char*)malloc(bufLen);
17727f654740f2a26ad62a5c155af9199af9e69b889claireho  uprv_eastrncpy((unsigned char*)ebuf, (const unsigned char*)buf, bufLen);
17827f654740f2a26ad62a5c155af9199af9e69b889claireho  uprv_strncpy(buf, ebuf, bufLen);
17927f654740f2a26ad62a5c155af9199af9e69b889claireho  free((void*)ebuf);
18027f654740f2a26ad62a5c155af9199af9e69b889claireho#endif
18127f654740f2a26ad62a5c155af9199af9e69b889claireho  utext_setNativeIndex(text, oldIndex);
18227f654740f2a26ad62a5c155af9199af9e69b889claireho}
18327f654740f2a26ad62a5c155af9199af9e69b889claireho
184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic char ASSERT_BUF[1024];
186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
187103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst char* RegexTest::extractToAssertBuf(const UnicodeString& message) {
188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  if(message.length()==0) {
189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    strcpy(ASSERT_BUF, "[[empty UnicodeString]]");
190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  } else {
191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString buf;
192103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    IntlTest::prettify(message,buf);
193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(buf.length()==0) {
194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho      strcpy(ASSERT_BUF, "[[escape() returned 0 chars]]");
195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho      buf.extract(0, 0x7FFFFFFF, ASSERT_BUF, sizeof(ASSERT_BUF)-1);
197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho      if(ASSERT_BUF[0]==0) {
198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ASSERT_BUF[0]=0;
199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        for(int32_t i=0;i<buf.length();i++) {
200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho          UChar ch = buf[i];
201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho          sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch);
202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho      }
204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  }
206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0;
207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  return ASSERT_BUF;
208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
21127f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);}
21227f654740f2a26ad62a5c155af9199af9e69b889claireho
21327f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure.  status=%s", \
21427f654740f2a26ad62a5c155af9199af9e69b889claireho                                                              __FILE__, __LINE__, u_errorName(status)); return;}}
21527f654740f2a26ad62a5c155af9199af9e69b889claireho
21627f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};}
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\
2196d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queruif (status!=errcode) {dataerrln("RegexTest failure at line %d.  Expected status=%s, got %s", \
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    __LINE__, u_errorName(errcode), u_errorName(status));};}
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "RegexTest failure at line %d, from %d.  status=%d\n",__LINE__, (line), status); }}
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}}
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define REGEX_ASSERT_UNISTR(ustr,inv) {if (!(ustr==inv)) {errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s,%s) failed \n", __FILE__, __LINE__, extractToAssertBuf(ustr),inv);};}
229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
230103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
231103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic UBool testUTextEqual(UText *uta, UText *utb) {
232103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    UChar32 ca = 0;
233103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    UChar32 cb = 0;
234103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    utext_setNativeIndex(uta, 0);
235103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    utext_setNativeIndex(utb, 0);
236103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    do {
237103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        ca = utext_next32(uta);
238103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        cb = utext_next32(utb);
239103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if (ca != cb) {
240103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
241103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
242103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } while (ca != U_SENTINEL);
243103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return ca == cb;
244103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
245103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
246103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
24727f654740f2a26ad62a5c155af9199af9e69b889claireho/**
24827f654740f2a26ad62a5c155af9199af9e69b889claireho * @param expected expected text in UTF-8 (not platform) codepage
24927f654740f2a26ad62a5c155af9199af9e69b889claireho */
25050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::assertUText(const char *expected, UText *actual, const char *file, int line) {
25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode status = U_ZERO_ERROR;
25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText expectedText = UTEXT_INITIALIZER;
25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&expectedText, expected, -1, &status);
25427f654740f2a26ad62a5c155af9199af9e69b889claireho    if(U_FAILURE(status)) {
25527f654740f2a26ad62a5c155af9199af9e69b889claireho      errln("%s:%d: assertUText: error %s calling utext_openUTF8(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected));
25627f654740f2a26ad62a5c155af9199af9e69b889claireho      return;
25727f654740f2a26ad62a5c155af9199af9e69b889claireho    }
25827f654740f2a26ad62a5c155af9199af9e69b889claireho    if(utext_nativeLength(&expectedText)==0 && (strlen(expected)!=0)) {
25927f654740f2a26ad62a5c155af9199af9e69b889claireho      errln("%s:%d: assertUText:  expected is %d utf-8 bytes, but utext_nativeLength(expectedText) returned 0.", file, line, strlen(expected));
26027f654740f2a26ad62a5c155af9199af9e69b889claireho      return;
26127f654740f2a26ad62a5c155af9199af9e69b889claireho    }
26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_setNativeIndex(actual, 0);
263103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if (!testUTextEqual(&expectedText, actual)) {
26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        char buf[201 /*21*/];
26527f654740f2a26ad62a5c155af9199af9e69b889claireho        char expectedBuf[201];
26627f654740f2a26ad62a5c155af9199af9e69b889claireho        utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);
26727f654740f2a26ad62a5c155af9199af9e69b889claireho        utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText);
26827f654740f2a26ad62a5c155af9199af9e69b889claireho        errln("%s:%d: assertUText: Failure: expected \"%s\" (%d chars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual));
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&expectedText);
27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
27227f654740f2a26ad62a5c155af9199af9e69b889claireho/**
27327f654740f2a26ad62a5c155af9199af9e69b889claireho * @param expected invariant (platform local text) input
27427f654740f2a26ad62a5c155af9199af9e69b889claireho */
27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
27627f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::assertUTextInvariant(const char *expected, UText *actual, const char *file, int line) {
27727f654740f2a26ad62a5c155af9199af9e69b889claireho    UErrorCode status = U_ZERO_ERROR;
27827f654740f2a26ad62a5c155af9199af9e69b889claireho    UText expectedText = UTEXT_INITIALIZER;
27927f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&expectedText, expected, -1, &status);
28027f654740f2a26ad62a5c155af9199af9e69b889claireho    if(U_FAILURE(status)) {
28127f654740f2a26ad62a5c155af9199af9e69b889claireho      errln("%s:%d: assertUTextInvariant: error %s calling regextst_openUTF8FromInvariant(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected));
28227f654740f2a26ad62a5c155af9199af9e69b889claireho      return;
28327f654740f2a26ad62a5c155af9199af9e69b889claireho    }
28427f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_setNativeIndex(actual, 0);
285103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if (!testUTextEqual(&expectedText, actual)) {
28627f654740f2a26ad62a5c155af9199af9e69b889claireho        char buf[201 /*21*/];
28727f654740f2a26ad62a5c155af9199af9e69b889claireho        char expectedBuf[201];
28827f654740f2a26ad62a5c155af9199af9e69b889claireho        utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);
28927f654740f2a26ad62a5c155af9199af9e69b889claireho        utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText);
29027f654740f2a26ad62a5c155af9199af9e69b889claireho        errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual));
29127f654740f2a26ad62a5c155af9199af9e69b889claireho    }
29227f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_close(&expectedText);
29327f654740f2a26ad62a5c155af9199af9e69b889claireho}
29427f654740f2a26ad62a5c155af9199af9e69b889claireho
29527f654740f2a26ad62a5c155af9199af9e69b889claireho/**
29627f654740f2a26ad62a5c155af9199af9e69b889claireho * Assumes utf-8 input
29727f654740f2a26ad62a5c155af9199af9e69b889claireho */
29827f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actual), __FILE__, __LINE__)
29927f654740f2a26ad62a5c155af9199af9e69b889claireho/**
30027f654740f2a26ad62a5c155af9199af9e69b889claireho * Assumes Invariant input
30127f654740f2a26ad62a5c155af9199af9e69b889claireho */
30227f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((expected), (actual), __FILE__, __LINE__)
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This buffer ( inv_buf ) is used to hold the UTF-8 strings
306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * passed into utext_openUTF8. An error will be given if
307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * INV_BUFSIZ is too small.  It's only used on EBCDIC systems.
308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define INV_BUFSIZ 2048 /* increase this if too small */
311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
31254dcd9b6a06071f647dac967e9e267abb9410720Craig Corneliusstatic int64_t inv_next=0;
313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if U_CHARSET_FAMILY!=U_ASCII_FAMILY
315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic char inv_buf[INV_BUFSIZ];
316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif
317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t length, UErrorCode *status) {
319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  if(length==-1) length=strlen(inv);
320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if U_CHARSET_FAMILY==U_ASCII_FAMILY
321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  inv_next+=length;
322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  return utext_openUTF8(ut, inv, length, status);
323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#else
324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  if(inv_next+length+1>INV_BUFSIZ) {
325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    fprintf(stderr, "%s:%d Error: INV_BUFSIZ #defined to be %d but needs to be at least %d.\n",
326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            __FILE__, __LINE__, INV_BUFSIZ, (inv_next+length+1));
327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    *status = U_MEMORY_ALLOCATION_ERROR;
328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return NULL;
329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  }
330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  unsigned char *buf = (unsigned char*)inv_buf+inv_next;
332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  uprv_aestrncpy(buf, (const uint8_t*)inv, length);
333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  inv_next+=length;
334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if 0
336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  fprintf(stderr, " Note: INV_BUFSIZ at %d, used=%d\n", INV_BUFSIZ, inv_next);
337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif
338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  return utext_openUTF8(ut, (const char*)buf, length, status);
340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif
341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    REGEX_TESTLM       Macro + invocation function to simplify writing quick tests
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                       for the LookingAt() and  Match() functions.
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//       usage:
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          REGEX_TESTLM("pattern",  "input text",  lookingAt expected, matches expected);
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          The expected results are UBool - TRUE or FALSE.
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          The input text is unescaped.  The pattern is not.
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define REGEX_TESTLM(pat, text, looking, match) {doRegexLMTest(pat, text, looking, match, __LINE__);doRegexLMTestUTF8(pat, text, looking, match, __LINE__);}
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line) {
361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const UnicodeString pattern(pat, -1, US_INV);
362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const UnicodeString inputText(text, -1, US_INV);
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode          status  = U_ZERO_ERROR;
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError         pe;
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        *REPattern = NULL;
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher        *REMatcher = NULL;
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool               retVal     = TRUE;
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeString patString(pat, -1, US_INV);
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REPattern = RegexPattern::compile(patString, 0, pe, status);
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
3726d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        dataerrln("RegexTest failure in RegexPattern::compile() at line %d.  Status = %s",
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            line, u_errorName(status));
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (line==376) { RegexPatternDump(REPattern);}
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString inputString(inputText);
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString unEscapedInput = inputString.unescape();
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REMatcher = REPattern->matcher(unEscapedInput, status);
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("RegexTest failure in REPattern::matcher() at line %d.  Status = %s\n",
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            line, u_errorName(status));
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool actualmatch;
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    actualmatch = REMatcher->lookingAt(status);
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("RegexTest failure in lookingAt() at line %d.  Status = %s\n",
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            line, u_errorName(status));
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retVal =  FALSE;
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (actualmatch != looking) {
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("RegexTest: wrong return from lookingAt() at line %d.\n", line);
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retVal = FALSE;
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    actualmatch = REMatcher->matches(status);
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("RegexTest failure in matches() at line %d.  Status = %s\n",
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            line, u_errorName(status));
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retVal = FALSE;
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (actualmatch != match) {
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("RegexTest: wrong return from matches() at line %d.\n", line);
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retVal = FALSE;
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (retVal == FALSE) {
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPatternDump(REPattern);
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete REPattern;
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete REMatcher;
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return retVal;
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool looking, UBool match, int32_t line) {
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText               pattern    = UTEXT_INITIALIZER;
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             inputUTF8Length;
42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char                *textChars = NULL;
42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText               inputText  = UTEXT_INITIALIZER;
42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode          status     = U_ZERO_ERROR;
42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError         pe;
42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *REPattern = NULL;
42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher        *REMatcher = NULL;
43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool               retVal     = TRUE;
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
43227f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&pattern, pat, -1, &status);
43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REPattern = RegexPattern::compile(&pattern, 0, pe, status);
43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8).  Status = %s\n",
43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            line, u_errorName(status));
43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString inputString(text, -1, US_INV);
44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString unEscapedInput = inputString.unescape();
44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status));
44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status);
44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // UTF-8 does not allow unpaired surrogates, so this could actually happen
44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        logln("RegexTest unable to convert input to UTF8 at line %d.  Status = %s\n", line, u_errorName(status));
44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE; // not a failure of the Regex engine
45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR; // buffer overflow
45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    textChars = new char[inputUTF8Length+1];
45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(), status);
45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&inputText, textChars, inputUTF8Length, &status);
45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
456b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REMatcher = &REPattern->matcher(status)->reset(&inputText);
45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("RegexTest failure in REPattern::matcher() at line %d (UTF8).  Status = %s\n",
45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            line, u_errorName(status));
46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool actualmatch;
46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    actualmatch = REMatcher->lookingAt(status);
46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("RegexTest failure in lookingAt() at line %d (UTF8).  Status = %s\n",
46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            line, u_errorName(status));
46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retVal =  FALSE;
46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (actualmatch != looking) {
47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("RegexTest: wrong return from lookingAt() at line %d (UTF8).\n", line);
47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retVal = FALSE;
47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    actualmatch = REMatcher->matches(status);
47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("RegexTest failure in matches() at line %d (UTF8).  Status = %s\n",
47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            line, u_errorName(status));
48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retVal = FALSE;
48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (actualmatch != match) {
48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", line);
48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retVal = FALSE;
48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (retVal == FALSE) {
48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPatternDump(REPattern);
48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REPattern;
49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REMatcher;
49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&inputText);
49450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&pattern);
49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete[] textChars;
49650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return retVal;
49750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    REGEX_ERR       Macro + invocation function to simplify writing tests
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                       regex tests for incorrect patterns
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//       usage:
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          REGEX_ERR("pattern",   expected error line, column, expected status);
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_ERR(pat, line, col, status) regex_err(pat, line, col, status, __LINE__);
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol,
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          UErrorCode expectedStatus, int32_t line) {
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString       pattern(pat);
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode          status         = U_ZERO_ERROR;
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError         pe;
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        *callerPattern = NULL;
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Compile the caller's pattern
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString patString(pat);
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    callerPattern = RegexPattern::compile(patString, 0, pe, status);
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (status != expectedStatus) {
5266d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status));
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (status != U_ZERO_ERROR) {
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (pe.line != errLine || pe.offset != errCol) {
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("Line %d: incorrect line/offset from UParseError.  Expected %d/%d; got %d/%d.\n",
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    line, errLine, errCol, pe.line, pe.offset);
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete callerPattern;
53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Compile again, using a UTF-8-based UText
54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
54150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText patternText = UTEXT_INITIALIZER;
54227f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&patternText, pat, -1, &status);
54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    callerPattern = RegexPattern::compile(&patternText, 0, pe, status);
54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (status != expectedStatus) {
54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status));
54650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
54750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status != U_ZERO_ERROR) {
54850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (pe.line != errLine || pe.offset != errCol) {
54950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                errln("Line %d: incorrect line/offset from UParseError.  Expected %d/%d; got %d/%d.\n",
55050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    line, errLine, errCol, pe.line, pe.offset);
55150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete callerPattern;
55650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&patternText);
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      Basic      Check for basic functionality of regex pattern matching.
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                 Avoid the use of REGEX_FIND test macro, which has
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                 substantial dependencies on basic Regex functionality.
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::Basic() {
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Debug - slide failing test cases early
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE);
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError pe;
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode  status = U_ZERO_ERROR;
579103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        RegexPattern *pattern;
580103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unescape(), UREGEX_CASE_INSENSITIVE, pe, status);
581103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        RegexPatternDump(pattern);
582103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz").unescape(), status);
583103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        UBool result = m->find();
584103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        printf("result = %d\n", result);
585103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd");
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX====================");
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exit(1);
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Pattern with parentheses
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)ring", "stabcring thing", TRUE,  FALSE);
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)ring", "stabcring",       TRUE,  TRUE);
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)ring", "stabcrung",       FALSE, FALSE);
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Patterns with *
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)*ring", "string", TRUE, TRUE);
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)*ring", "stabcring", TRUE, TRUE);
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)*ring", "stabcabcring", TRUE, TRUE);
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)*ring", "stabcabcdring", FALSE, FALSE);
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("st(abc)*ring", "stabcabcabcring etc.", TRUE, FALSE);
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a*", "",  TRUE, TRUE);
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a*", "b", TRUE, FALSE);
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Patterns with "."
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".", "abc", TRUE, FALSE);
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("...", "abc", TRUE, TRUE);
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("....", "abc", FALSE, FALSE);
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".*", "abcxyz123", TRUE, TRUE);
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab.*xyz", "abcdefghij", FALSE, FALSE);
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab.*xyz", "abcdefg...wxyz", TRUE, TRUE);
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz", TRUE, TRUE);
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz...", TRUE, FALSE);
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Patterns with * applied to chars at end of literal string
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("abc*", "ab", TRUE, TRUE);
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("abc*", "abccccc", TRUE, TRUE);
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Supplemental chars match as single chars, not a pair of surrogates.
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".", "\\U00011000", TRUE, TRUE);
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("...", "\\U00011000x\\U00012002", TRUE, TRUE);
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("...", "\\U00011000x\\U00012002y", TRUE, FALSE);
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  UnicodeSets in the pattern
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[1-6]", "1", TRUE, TRUE);
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[1-6]", "3", TRUE, TRUE);
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[1-6]", "7", FALSE, FALSE);
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a[1-6]b", "a3b", TRUE, TRUE);
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a[0-9]*b", "a123b", TRUE, TRUE);
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a[0-9]*b", "abc", TRUE, FALSE);
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[\\p{Nd}]*", "123456", TRUE, TRUE);
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[\\p{Nd}]*", "a123456", TRUE, FALSE);   // note that * matches 0 occurences.
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("[a][b][[:Zs:]]*", "ab   ", TRUE, TRUE);
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   OR operator in patterns
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|b)", "a", TRUE, TRUE);
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|b)", "b", TRUE, TRUE);
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|b)", "c", FALSE, FALSE);
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a|b", "b", TRUE, TRUE);
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabc", TRUE, TRUE);
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabdc", TRUE, FALSE);
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "ac", TRUE, TRUE);
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "123", TRUE, TRUE);
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "123", TRUE, TRUE);
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "222211111czzzzw", TRUE, FALSE);
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  +
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab+", "abbc", TRUE, FALSE);
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab+c", "ac", FALSE, FALSE);
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("b+", "", FALSE, FALSE);
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("(abc|def)+", "defabc", TRUE, TRUE);
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".+y", "zippity dooy dah ", TRUE, FALSE);
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".+y", "zippity dooy", TRUE, TRUE);
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   ?
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab?", "ab", TRUE, TRUE);
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab?", "a", TRUE, TRUE);
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab?", "ac", TRUE, FALSE);
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("ab?", "abb", TRUE, FALSE);
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a(b|c)?d", "abd", TRUE, TRUE);
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a(b|c)?d", "acd", TRUE, TRUE);
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a(b|c)?d", "ad", TRUE, TRUE);
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a(b|c)?d", "abcd", FALSE, FALSE);
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("a(b|c)?d", "ab", FALSE, FALSE);
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Escape sequences that become single literal chars, handled internally
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   by ICU's Unescape.
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // REGEX_TESTLM("\101\142", "Ab", TRUE, TRUE);      // Octal     TODO: not implemented yet.
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\a", "\\u0007", TRUE, TRUE);        // BEL
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\cL", "\\u000c", TRUE, TRUE);       // Control-L
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\e", "\\u001b", TRUE, TRUE);        // Escape
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\f", "\\u000c", TRUE, TRUE);        // Form Feed
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\n", "\\u000a", TRUE, TRUE);        // new line
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\r", "\\u000d", TRUE, TRUE);        //  CR
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\t", "\\u0009", TRUE, TRUE);        // Tab
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\u1234", "\\u1234", TRUE, TRUE);
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\U00001234", "\\u1234", TRUE, TRUE);
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".*\\Ax", "xyz", TRUE, FALSE);  //  \A matches only at the beginning of input
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM(".*\\Ax", " xyz", FALSE, FALSE);  //  \A matches only at the beginning of input
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Escape of special chars in patterns
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_TESTLM("\\\\\\|\\(\\)\\[\\{\\~\\$\\*\\+\\?\\.", "\\\\|()[{~$*+?.", TRUE, TRUE);
71350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
71650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
71750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
71850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    UTextBasic   Check for quirks that are specific to the UText
71950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                 implementation.
72050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
72150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
72250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::UTextBasic() {
72327f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
72450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode status = U_ZERO_ERROR;
72550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText pattern = UTEXT_INITIALIZER;
72627f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&pattern, str_abc, -1, &status);
72750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher matcher(&pattern, 0, status);
72850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
72950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
73050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText input = UTEXT_INITIALIZER;
73127f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&input, str_abc, -1, &status);
73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
73350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher.reset(&input);
73450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
73527f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher.reset(matcher.inputText());
73850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
73927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
74150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&pattern);
74250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&input);
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      API_Match   Test that the API for class RegexMatcher
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                  is present and nominally working, but excluding functions
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                  implementing replace operations.
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Match() {
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError         pe;
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode          status=U_ZERO_ERROR;
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t             flags = 0;
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Debug - slide failing test cases early
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return;
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Simple pattern compilation
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString       re("abc");
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern        *pat2;
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pat2 = RegexPattern::compile(re, flags, pe, status);
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString inStr1 = "abcdef this is a test";
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString instr2 = "not abc";
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString empty  = "";
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Matcher creation and reset.
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher *m1 = pat2->matcher(inStr1, status);
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->input() == inStr1);
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(instr2);
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->input() == instr2);
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(inStr1);
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->input() == inStr1);
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(empty);
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->input() == empty);
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(&m1->pattern() == pat2);
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  reset(pos, status)
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(inStr1);
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(4, status);
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->input() == inStr1);
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(-1, status);
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(0, status);
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t len = m1->input().length();
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(len-1, status);
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(len, status);
82227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
82327f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
82427f654740f2a26ad62a5c155af9199af9e69b889claireho
82527f654740f2a26ad62a5c155af9199af9e69b889claireho        m1->reset(len+1, status);
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // match(pos, status)
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(instr2);
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(4, status) == TRUE);
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset();
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(3, status) == FALSE);
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset();
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(5, status) == FALSE);
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(4, status) == TRUE);
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(-1, status) == FALSE);
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Match() at end of string should fail, but should not
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  be an error.
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        len = m1->input().length();
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(len, status) == FALSE);
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Match beyond end of string should fail with an error.
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->matches(len+1, status) == FALSE);
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Successful match at end of string.
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status = U_ZERO_ERROR;
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            RegexMatcher m("A?", 0, status);  // will match zero length string.
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_CHECK_STATUS;
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            m.reset(inStr1);
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            len = inStr1.length();
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.matches(len, status) == TRUE);
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_CHECK_STATUS;
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            m.reset(empty);
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.matches(0, status) == TRUE);
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_CHECK_STATUS;
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // lookingAt(pos, status)
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m1->reset(instr2);  // "not abc"
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(5, status) == FALSE);
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(3, status) == FALSE);
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE);
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        len = m1->input().length();
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(len, status) == FALSE);
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m1->lookingAt(len+1, status) == FALSE);
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete m1;
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete pat2;
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Capture Group.
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     RegexMatcher::start();
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     RegexMatcher::end();
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     RegexMatcher::groupCount();
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t             flags=0;
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError         pe;
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          status=U_ZERO_ERROR;
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString       re("01(23(45)67)(.*)");
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString data = "0123456789";
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher *matcher = pat->matcher(data, status);
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->lookingAt(status) == TRUE);
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        static const int32_t matchStarts[] = {0,  2, 4, 8};
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        static const int32_t matchEnds[]   = {10, 8, 6, 10};
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t i;
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; i<4; i++) {
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t actualStart = matcher->start(i, status);
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_CHECK_STATUS;
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (actualStart != matchStarts[i]) {
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("RegexTest failure at line %d, index %d.  Expected %d, got %d\n",
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    __LINE__, i, matchStarts[i], actualStart);
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t actualEnd = matcher->end(i, status);
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_CHECK_STATUS;
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (actualEnd != matchEnds[i]) {
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("RegexTest failure at line %d index %d.  Expected %d, got %d\n",
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    __LINE__, i, matchEnds[i], actualEnd);
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(0, status) == matcher->start(status));
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->end(0, status) == matcher->end(status));
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->reset();
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->lookingAt(status);
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->group(status)    == "0123456789");
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->group(0, status) == "0123456789");
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->group(1, status) == "234567"    );
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->group(2, status) == "45"        );
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->group(3, status) == "89"        );
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->reset();
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete matcher;
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete pat;
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  find
956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t             flags=0;
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError         pe;
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          status=U_ZERO_ERROR;
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString       re("abc");
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString data = ".abc..abc...abc..";
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                    012345678901234567
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher *matcher = pat->matcher(data, status);
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 1);
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 6);
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 12);
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find() == FALSE);
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find() == FALSE);
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->reset();
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 1);
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(0, status));
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 1);
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(1, status));
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 1);
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(2, status));
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 6);
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(12, status));
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 12);
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(13, status) == FALSE);
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(16, status) == FALSE);
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find(17, status) == FALSE);
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE);
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR);
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->groupCount() == 0);
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete matcher;
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete pat;
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  find, with \G in pattern (true if at the end of a previous match).
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t             flags=0;
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError         pe;
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          status=U_ZERO_ERROR;
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString       re(".*?(?:(\\Gabc)|(abc))", -1, US_INV);
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString data = ".abcabc.abc..";
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                    012345678901234567
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher *matcher = pat->matcher(data, status);
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 0);
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(1, status) == -1);
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(2, status) == 1);
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->find());
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(status) == 4);
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(1, status) == 4);
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(matcher->start(2, status) == -1);
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete matcher;
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete pat;
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   find with zero length matches, match position should bump ahead
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     to prevent loops.
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t                 i;
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          status=U_ZERO_ERROR;
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher        m("(?= ?)", 0, status);   // This pattern will zero-length matches anywhere,
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                      //   using an always-true look-ahead.
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString s("    ");
1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(s);
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; ; i++) {
1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (m.find() == FALSE) {
1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.start(status) == i);
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.end(status) == i);
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(i==5);
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Check that the bump goes over surrogate pairs OK
1061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s = UNICODE_STRING_SIMPLE("\\U00010001\\U00010002\\U00010003\\U00010004");
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s = s.unescape();
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(s);
1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; ; i+=2) {
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (m.find() == FALSE) {
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.start(status) == i);
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.end(status) == i);
1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(i==10);
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // find() loop breaking test.
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //        with pattern of /.?/, should see a series of one char matches, then a single
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //        match of zero length at the end of the input string.
1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t                 i;
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          status=U_ZERO_ERROR;
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher        m(".?", 0, status);
1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString s("    ");
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(s);
1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; ; i++) {
1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (m.find() == FALSE) {
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.start(status) == i);
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(i==5);
1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Matchers with no input string behave as if they had an empty input string.
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher  m(".?", 0, status);
1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m.find());
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m.start(status) == 0);
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m.input() == "");
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern  *p = RegexPattern::compile(".", 0, status);
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher  *m = p->matcher(status);
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m->find() == FALSE);
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(m->input() == "");
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete m;
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete p;
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // Regions
1120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString testString("This is test data");
1124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher m(".*", testString,  0, status);
1125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionStart() == 0);
1127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionEnd() == testString.length());
1128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        m.region(2,4, status);
1132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.matches(status));
1134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.start(status)==2);
1135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.end(status)==4);
1136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        m.reset();
1139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionStart() == 0);
1140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionEnd() == testString.length());
1141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString shorterString("short");
1143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        m.reset(shorterString);
1144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionStart() == 0);
1145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.regionEnd() == shorterString.length());
1146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
1149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
1150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.reset());
1151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
1152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
1154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.reset());
1156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
1160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
1161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.reset());
1162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
1163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));
1165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(&m == &m.reset());
1167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // hitEnd() and requireEnd()
1173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString testString("aabb");
1177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher m1(".*", testString,  0, status);
1178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m1.lookingAt(status) == TRUE);
1179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m1.hitEnd() == TRUE);
1180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m1.requireEnd() == FALSE);
1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
1184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher m2("a*", testString, 0, status);
1185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m2.lookingAt(status) == TRUE);
1186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m2.hitEnd() == FALSE);
1187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m2.requireEnd() == FALSE);
1188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
1191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher m3(".*$", testString, 0, status);
1192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m3.lookingAt(status) == TRUE);
1193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m3.hitEnd() == TRUE);
1194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(m3.requireEnd() == TRUE);
1195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Compilation error on reset with UChar *
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   These were a hazard that people were stumbling over with runtime errors.
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Changed them to compiler errors by adding private methods that more closely
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   matched the incorrect use of the functions.
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar ucharString[20];
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher m(".", 0, status);
1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(ucharString);  // should not compile.
1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern *p = RegexPattern::compile(".", 0, status);
1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher *m2 = p->matcher(ucharString, status);    //  should not compile.
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher m3(".", ucharString, 0, status);  //  Should not compile
1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //  Time Outs.
1221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //       Note:  These tests will need to be changed when the regexp engine is
1222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //              able to detect and cut short the exponential time behavior on
1223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //              this type of match.
1224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //    Enough 'a's in the string to cause the match to time out.
1228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //       (Each on additonal 'a' doubles the time)
1229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa");
1230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher("(a+)+b", testString, 0, status);
1231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getTimeLimit() == 0);
1233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setTimeLimit(100, status);
1234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getTimeLimit() == 100);
1235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(status == U_REGEX_TIME_OUT);
1237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   Few enough 'a's to slip in under the time limit.
1241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString testString("aaaaaaaaaaaaaaaaaa");
1242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher("(a+)+b", testString, 0, status);
1243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setTimeLimit(100, status);
1245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //  Stack Limits
1251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
1252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString testString(1000000, 0x41, 1000000);  // Length 1,000,000, filled with 'A'
1255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations
1257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   of the '+', and makes the stack frames larger.
1258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher("(A)+A$", testString, 0, status);
1259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // With the default stack, this match should fail to run
1261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
1263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // With unlimited stack, it should run
1265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
1266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setStackLimit(0, status);
1267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.lookingAt(status) == TRUE);
1269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getStackLimit() == 0);
1271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // With a limited stack, it the match should fail
1273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
1274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setStackLimit(10000, status);
1275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
1277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getStackLimit() == 10000);
1278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // A pattern that doesn't save state should work with
1281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   a minimal sized stack
1282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    {
1283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString testString = "abc";
1285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher("abc", testString, 0, status);
1286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setStackLimit(30, status);
1288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.matches(status) == TRUE);
1290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getStackLimit() == 30);
1292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Negative stack sizes should fail
1294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
1295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setStackLimit(1000, status);
1296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setStackLimit(-1, status);
1298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.getStackLimit() == 1000);
1300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      API_Replace        API test for class RegexMatcher, testing the
1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                         Replace family of functions.
1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Replace() {
1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Replace
1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t             flags=0;
1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError         pe;
1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode          status=U_ZERO_ERROR;
1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString       re("abc");
1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString data = ".abc..abc...abc..";
1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //                    012345678901234567
1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *matcher = pat->matcher(data, status);
1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Plain vanilla matches.
1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString  dest;
1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceFirst("yz", status);
1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == ".yz..abc...abc..");
1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceAll("yz", status);
1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == ".yz..yz...yz..");
1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Plain vanilla non-matches.
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString d2 = ".abx..abx...abx..";
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    matcher->reset(d2);
1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceFirst("yz", status);
1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == ".abx..abx...abx..");
1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceAll("yz", status);
1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == ".abx..abx...abx..");
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Empty source string
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString d3 = "";
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    matcher->reset(d3);
1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceFirst("yz", status);
1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "");
1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceAll("yz", status);
1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "");
1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Empty substitution string
1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    matcher->reset(data);              // ".abc..abc...abc.."
1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceFirst("", status);
1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "...abc...abc..");
1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceAll("", status);
1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "........");
1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // match whole string
1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString d4 = "abc";
1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    matcher->reset(d4);
1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceFirst("xyz", status);
1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "xyz");
1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher->replaceAll("xyz", status);
1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "xyz");
1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Capture Group, simple case
1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString       re2("a(..)");
1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern *pat2 = RegexPattern::compile(re2, flags, pe, status);
1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString d5 = "abcdefg";
1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *matcher2 = pat2->matcher(d5, status);
1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher2->replaceFirst("$1$1", status);
1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "bcbcdefg");
1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1."), status);
1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "The value of $1 is bc.defg");
1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher2->replaceFirst("$ by itself, no group number $$$", status);
1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "$ by itself, no group number $$$defg");
1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U0001D7CF.");
1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    replacement = replacement.unescape();
1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dest = matcher2->replaceFirst(replacement, status);
1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(dest == "Supplemental Digit 1 bc.defg");
1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT_FAIL(matcher2->replaceFirst("bad capture group number $5...",status), U_INDEX_OUTOFBOUNDS_ERROR);
1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Replacement String with \u hex escapes
1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString  src = "abc 1 abc 2 abc 3";
1429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString  substitute = UNICODE_STRING_SIMPLE("--\\u0043--");
1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->reset(src);
1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString  result = matcher->replaceAll(substitute, status);
1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == "--C-- 1 --C-- 2 --C-- 3");
1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString  src = "abc !";
1437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString  substitute = UNICODE_STRING_SIMPLE("--\\U00010000--");
1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matcher->reset(src);
1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString  result = matcher->replaceAll(substitute, status);
1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString expected = UnicodeString("--");
1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expected.append((UChar32)0x10000);
1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expected.append("-- !");
1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == expected);
1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // TODO:  need more through testing of capture substitutions.
1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Bug 4057
1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString s = "The matches start with ss and end with ee ss stuff ee fin";
1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher m("ss(.*?)ee", 0, status);
1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString result;
1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Multiple finds do NOT bump up the previous appendReplacement postion.
1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(s);
1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find();
1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find();
1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.appendReplacement(result, "ooh", status);
1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh");
1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // After a reset into the interior of a string, appendReplacemnt still starts at beginning.
1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.truncate(0);
1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset(10, status);
1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find();
1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find();
1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.appendReplacement(result, "ooh", status);
1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh");
1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // find() at interior of string, appendReplacemnt still starts at beginning.
1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.truncate(0);
1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.reset();
1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find(10, status);
1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.find();
1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.appendReplacement(result, "ooh", status);
1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh");
1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        m.appendTail(result);
1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh fin");
1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete matcher2;
1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat2;
1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete matcher;
1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat;
1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      API_Pattern       Test that the API for class RegexPattern is
1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                        present and nominally working.
1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexTest::API_Pattern() {
1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        pata;    // Test default constructor to not crash.
1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        patb;
1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pata == patb);
1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pata == pata);
1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString re1("abc[a-l][m-z]");
1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString re2("def");
1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode    status = U_ZERO_ERROR;
1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError   pe;
1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        *pat1 = RegexPattern::compile(re1, 0, pe, status);
1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        *pat2 = RegexPattern::compile(re2, 0, pe, status);
1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1 == *pat1);
1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1 != pata);
1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Assign
1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    patb = *pat1;
1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(patb == *pat1);
1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Copy Construct
1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern patc(*pat1);
1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(patc == *pat1);
1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(patb == patc);
1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1 != pat2);
1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    patb = *pat2;
1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(patb != patc);
1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(patb == *pat2);
1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Compile with no flags.
1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern         *pat1a = RegexPattern::compile(re1, pe, status);
1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1a == *pat1);
1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1a->flags() == 0);
1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Compile with different flags should be not equal
1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        *pat1b = RegexPattern::compile(re1, UREGEX_CASE_INSENSITIVE, pe, status);
1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1b != *pat1a);
1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE);
1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1a->flags() == 0);
1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1b;
1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // clone
1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern *pat1c = pat1->clone();
1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1c == *pat1);
1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(*pat1c != *pat2);
1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1c;
1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1a;
1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat2;
1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Verify that a matcher created from a cloned pattern works.
1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     (Jitterbug 3423)
1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode     status     = U_ZERO_ERROR;
1566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexPattern  *pSource    = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\p{L}+"), 0, status);
1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexPattern  *pClone     = pSource->clone();
1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete         pSource;
1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        RegexMatcher  *mFromClone = pClone->matcher(status);
1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_CHECK_STATUS;
1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString s = "Hello World";
1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        mFromClone->reset(s);
1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(mFromClone->find() == TRUE);
1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(mFromClone->group(status) == "Hello");
1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(mFromClone->find() == TRUE);
1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(mFromClone->group(status) == "World");
1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        REGEX_ASSERT(mFromClone->find() == FALSE);
1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete mFromClone;
1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete pClone;
1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   matches convenience API
1584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches(".*", "random input", pe, status) == TRUE);
1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);
1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);
1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);
1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);
1594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_INDEX_OUTOFBOUNDS_ERROR;
1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);
1597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Split()
1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat1 = RegexPattern::compile(" +",  pe, status);
1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString  fields[10];
1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n;
1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("Now is the time", fields, 10, status);
1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==4);
1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="Now");
1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="is");
1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="the");
1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="time");
1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="");
1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("Now is the time", fields, 2, status);
1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==2);
1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="Now");
1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="is the time");
1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="the");   // left over from previous test
1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fields[1] = "*";
1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("Now is the time", fields, 1, status);
1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==1);
1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="Now is the time");
1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="*");
1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("    Now       is the time   ", fields, 10, status);
1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1636b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==6);
1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="");
1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="Now");
1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="is");
1640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="the");
1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="time");
1642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[5]=="");
1643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("     ", fields, 10, status);
1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1646b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==2);
1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="");
1648b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[1]=="");
1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fields[0] = "foo";
1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("", fields, 10, status);
1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==0);
1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="foo");
1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  split, with a pattern with (capture)
1659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    pat1 = RegexPattern::compile(UNICODE_STRING_SIMPLE("<(\\w*)>"),  pe, status);
1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status);
1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1665b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==7);
1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="");
1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="b");
1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="the time");
1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[5]=="c");
1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[6]=="");
1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(status==U_ZERO_ERROR);
1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("  <a>Now is <b>the time<c>", fields, 10, status);
1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1677b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==7);
1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="  ");
1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="b");
1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="the time");
1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[5]=="c");
1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[6]=="");
1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fields[6] = "foo";
1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("  <a>Now is <b>the time<c>", fields, 6, status);
1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==6);
1691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="  ");
1692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="b");
1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="the time");
1696b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[5]=="");  // All text following "<c>" field delimiter.
1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[6]=="foo");
1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fields[5] = "foo";
1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("  <a>Now is <b>the time<c>", fields, 5, status);
1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==5);
1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="  ");
1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="b");
1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="the time<c>");
1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[5]=="foo");
1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fields[5] = "foo";
1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("  <a>Now is <b>the time", fields, 5, status);
1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==5);
1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="  ");
1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="b");
1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="the time");
1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[5]=="foo");
1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("  <a>Now is <b>the time<c>", fields, 4, status);
1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==4);
1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="  ");
1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="a");
1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="Now is ");
1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]=="the time<c>");
1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat1 = RegexPattern::compile("([-,])",  pe, status);
1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n = pat1->split("1-10,20", fields, 10, status);
1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(n==5);
1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[0]=="1");
1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[1]=="-");
1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[2]=="10");
1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[3]==",");
1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(fields[4]=="20");
1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1746b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Test split of string with empty trailing fields
1747b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    pat1 = RegexPattern::compile(",", pe, status);
1748b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
1749b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    n = pat1->split("a,b,c,", fields, 10, status);
1750b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
1751b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==4);
1752b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[0]=="a");
1753b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[1]=="b");
1754b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[2]=="c");
1755b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[3]=="");
1756b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1757b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    n = pat1->split("a,,,", fields, 10, status);
1758b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
1759b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==4);
1760b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[0]=="a");
1761b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[1]=="");
1762b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[2]=="");
1763b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[3]=="");
1764b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete pat1;
1765b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1766b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Split Separator with zero length match.
1767b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    pat1 = RegexPattern::compile(":?", pe, status);
1768b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
1769b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    n = pat1->split("abc", fields, 10, status);
1770b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
1771b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==5);
1772b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[0]=="");
1773b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[1]=="a");
1774b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[2]=="b");
1775b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[3]=="c");
1776b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[4]=="");
1777b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1778b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete pat1;
1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // RegexPattern::pattern()
1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat1 = new RegexPattern();
1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1->pattern() == "");
1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat1 = RegexPattern::compile("(Hello, world)*",  pe, status);
1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1->pattern() == "(Hello, world)*");
1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // classID functions
1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat1 = RegexPattern::compile("(Hello, world)*",  pe, status);
1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_CHECK_STATUS;
1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1->getDynamicClassID() == RegexPattern::getStaticClassID());
1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1->getDynamicClassID() != NULL);
1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString Hello("Hello, world.");
1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *m = pat1->matcher(Hello, status);
1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(pat1->getDynamicClassID() != m->getDynamicClassID());
1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(m->getDynamicClassID() == RegexMatcher::getStaticClassID());
1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REGEX_ASSERT(m->getDynamicClassID() != NULL);
1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete m;
1806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat1;
1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
181250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//      API_Match_UTF8   Test that the alternate engine for class RegexMatcher
181350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       is present and working, but excluding functions
181450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       implementing replace operations.
1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
181750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Match_UTF8() {
181850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError         pe;
181950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode          status=U_ZERO_ERROR;
182050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             flags = 0;
1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
182350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Debug - slide failing test cases early
1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
182550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if 0
182650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
182850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return;
182950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
183250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Simple pattern compilation
1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
183450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
183550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText               re = UTEXT_INITIALIZER;
183627f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&re, "abc", -1, &status);
1837b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        REGEX_VERBOSE_TEXT(&re);
183850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern        *pat2;
183950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pat2 = RegexPattern::compile(&re, flags, pe, status);
184050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
184250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText input1 = UTEXT_INITIALIZER;
184350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText input2 = UTEXT_INITIALIZER;
184450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText empty  = UTEXT_INITIALIZER;
184527f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &status);
184627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_VERBOSE_TEXT(&input1);
184727f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status);
184827f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_VERBOSE_TEXT(&input2);
184950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUChars(&empty, NULL, 0, &status);
185050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
185127f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */
185250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t input2Len = strlen("not abc");
1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
185550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
185650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Matcher creation and reset.
185750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
1858b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1);
185950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
186050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
186127f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_abcdefthisisatest[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x00 }; /* abcdef this is a test */
186227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText());
186350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&input2);
186450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
186527f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_notabc[] = { 0x6e, 0x6f, 0x74, 0x20, 0x61, 0x62, 0x63, 0x00 }; /* not abc */
186627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_notabc, m1->inputText());
186750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&input1);
186827f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText());
186950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
187050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&empty);
187150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
187250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(utext_nativeLength(&empty) == 0);
1873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
187450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
187550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  reset(pos, status)
187650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
187750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&input1);
187850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(4, status);
187950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
188027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText());
188150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
188350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(-1, status);
188450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
188750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(0, status);
188850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
188950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
1890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
189150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(input1Len-1, status);
189250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
189350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
189450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
189550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(input1Len, status);
189627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
189727f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
189827f654740f2a26ad62a5c155af9199af9e69b889claireho
189927f654740f2a26ad62a5c155af9199af9e69b889claireho        m1->reset(input1Len+1, status);
190050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
190150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
190450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // match(pos, status)
1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
190650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&input2);
190750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(4, status) == TRUE);
190850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset();
190950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(3, status) == FALSE);
191050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset();
191150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(5, status) == FALSE);
191250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(4, status) == TRUE);
191350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(-1, status) == FALSE);
191450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
191650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Match() at end of string should fail, but should not
191750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  be an error.
191850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
191950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(input2Len, status) == FALSE);
192050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
192250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Match beyond end of string should fail with an error.
192350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
192450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->matches(input2Len+1, status) == FALSE);
192550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
192750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Successful match at end of string.
192850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        {
192950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
193050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            RegexMatcher m("A?", 0, status);  // will match zero length string.
193150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS;
193250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            m.reset(&input1);
193350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.matches(input1Len, status) == TRUE);
193450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS;
193550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            m.reset(&empty);
193650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.matches(0, status) == TRUE);
193750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS;
1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
194250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // lookingAt(pos, status)
1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
194450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
194550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m1->reset(&input2);  // "not abc"
194650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
194750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(5, status) == FALSE);
194850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(3, status) == FALSE);
194950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
195050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE);
195150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
195250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
195350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE);
195450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
195550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE);
195650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
195850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete m1;
195950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pat2;
196050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
196150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&re);
196250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input1);
196350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input2);
196450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&empty);
196550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
196850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
196950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Capture Group.
197050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     RegexMatcher::start();
197150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     RegexMatcher::end();
197250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     RegexMatcher::groupCount();
197350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
197450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
197550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t             flags=0;
197650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UParseError         pe;
197750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode          status=U_ZERO_ERROR;
197850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText               re=UTEXT_INITIALIZER;
197927f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */
198027f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&re, str_01234567_pat, -1, &status);
198150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
198250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
198350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
198450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
198550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText input = UTEXT_INITIALIZER;
198627f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
198727f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_0123456789, -1, &status);
1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1989b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        RegexMatcher *matcher = &pat->matcher(status)->reset(&input);
199050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
199150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->lookingAt(status) == TRUE);
199250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        static const int32_t matchStarts[] = {0,  2, 4, 8};
199350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        static const int32_t matchEnds[]   = {10, 8, 6, 10};
199450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t i;
199550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (i=0; i<4; i++) {
199650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t actualStart = matcher->start(i, status);
199750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS;
199850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (actualStart != matchStarts[i]) {
199927f654740f2a26ad62a5c155af9199af9e69b889claireho                errln("RegexTest failure at %s:%d, index %d.  Expected %d, got %d\n",
200027f654740f2a26ad62a5c155af9199af9e69b889claireho                      __FILE__, __LINE__, i, matchStarts[i], actualStart);
200150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
200250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t actualEnd = matcher->end(i, status);
200350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS;
200450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (actualEnd != matchEnds[i]) {
200527f654740f2a26ad62a5c155af9199af9e69b889claireho                errln("RegexTest failure at %s:%d index %d.  Expected %d, got %d\n",
200627f654740f2a26ad62a5c155af9199af9e69b889claireho                      __FILE__, __LINE__, i, matchEnds[i], actualEnd);
200750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
200850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
201050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(0, status) == matcher->start(status));
201150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->end(0, status) == matcher->end(status));
2012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
201350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
201450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
201550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->reset();
201650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);
2017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
201850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->lookingAt(status);
201950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
202050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString dest;
202150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText destText = UTEXT_INITIALIZER;
202250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&destText, &dest, &status);
202350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText *result;
202427f654740f2a26ad62a5c155af9199af9e69b889claireho        //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
202527f654740f2a26ad62a5c155af9199af9e69b889claireho        //	Test shallow-clone API
202627f654740f2a26ad62a5c155af9199af9e69b889claireho        int64_t   group_len;
202727f654740f2a26ad62a5c155af9199af9e69b889claireho        result = matcher->group((UText *)NULL, group_len, status);
202850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
202927f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
203050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
203127f654740f2a26ad62a5c155af9199af9e69b889claireho        result = matcher->group(0, &destText, group_len, status);
203250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
203350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
203427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
203527f654740f2a26ad62a5c155af9199af9e69b889claireho        //  destText is now immutable, reopen it
203627f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_close(&destText);
203727f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUnicodeString(&destText, &dest, &status);
203850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
203950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(0, NULL, status);
204050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
204127f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
204250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
204350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(0, &destText, status);
204450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
204550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
204627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
204750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
204850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(1, NULL, status);
204950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
205027f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */
205127f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
205250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
205350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(1, &destText, status);
205450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
205550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
205627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
205750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
205850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(2, NULL, status);
205950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
206027f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */
206127f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_45, result);
206250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
206350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(2, &destText, status);
206450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
206550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
206627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_45, result);
206750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
206850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(3, NULL, status);
206950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
207027f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */
207127f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_89, result);
207250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
207350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->group(3, &destText, status);
207450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
207550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
207627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_89, result);
2077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
207850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
207950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
208050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->reset();
208150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);
2082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
208350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete matcher;
208450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pat;
208550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
208650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&destText);
208750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input);
208850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&re);
208950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
209250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  find
2093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
209450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
209550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t             flags=0;
209650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UParseError         pe;
209750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode          status=U_ZERO_ERROR;
209850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText               re=UTEXT_INITIALIZER;
209927f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
210027f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&re, str_abc, -1, &status);
2101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
210250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
210350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
210450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText input = UTEXT_INITIALIZER;
210527f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */
210627f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_abcabcabc, -1, &status);
210750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //                      012345678901234567
2108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        RegexMatcher *matcher = &pat->matcher(status)->reset(&input);
211050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
211150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
211250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 1);
211350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
211450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 6);
211550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
211650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 12);
211750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find() == FALSE);
211850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find() == FALSE);
2119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
212050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->reset();
212150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
212250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 1);
2123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
212450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(0, status));
212550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 1);
212650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(1, status));
212750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 1);
212850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(2, status));
212950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 6);
213050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(12, status));
213150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 12);
213250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(13, status) == FALSE);
213350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(16, status) == FALSE);
213450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find(17, status) == FALSE);
213550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE);
2136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
213750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
213850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
213950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
214050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR);
2141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
214250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->groupCount() == 0);
214350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
214450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete matcher;
214550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pat;
214650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
214750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input);
214850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&re);
2149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
2150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
215150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
215350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  find, with \G in pattern (true if at the end of a previous match).
2154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
215550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
215650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t             flags=0;
215750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UParseError         pe;
215850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode          status=U_ZERO_ERROR;
215950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText               re=UTEXT_INITIALIZER;
216027f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x00 }; /* .*?(?:(\\Gabc)|(abc)) */
216127f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&re, str_Gabcabc, -1, &status);
2162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
216350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
216450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
216650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText input = UTEXT_INITIALIZER;
216727f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */
216827f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_abcabcabc, -1, &status);
216950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //                      012345678901234567
2170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        RegexMatcher *matcher = &pat->matcher(status)->reset(&input);
217250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
217350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
217450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 0);
217550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(1, status) == -1);
217650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(2, status) == 1);
2177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
217850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->find());
217950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(status) == 4);
218050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(1, status) == 4);
218150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(matcher->start(2, status) == -1);
218250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
218350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
218450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete matcher;
218550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pat;
218650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
218750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input);
218850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&re);
2189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
2190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
219250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   find with zero length matches, match position should bump ahead
219350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     to prevent loops.
2194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
219550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
219650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t                 i;
219750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode          status=U_ZERO_ERROR;
219850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher        m("(?= ?)", 0, status);   // This pattern will zero-length matches anywhere,
219950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                      //   using an always-true look-ahead.
220050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
220150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText s = UTEXT_INITIALIZER;
220250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&s, "    ", -1, &status);
220350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(&s);
220450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (i=0; ; i++) {
220550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (m.find() == FALSE) {
220650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
220750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
220850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.start(status) == i);
220950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.end(status) == i);
221050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
221150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(i==5);
221250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
221350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Check that the bump goes over characters outside the BMP OK
221450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // "\\U00010001\\U00010002\\U00010003\\U00010004".unescape()...in UTF-8
221550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        unsigned char aboveBMP[] = {0xF0, 0x90, 0x80, 0x81, 0xF0, 0x90, 0x80, 0x82, 0xF0, 0x90, 0x80, 0x83, 0xF0, 0x90, 0x80, 0x84, 0x00};
221650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&s, (char *)aboveBMP, -1, &status);
221750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(&s);
221827f654740f2a26ad62a5c155af9199af9e69b889claireho        for (i=0; ; i+=4) {
221950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (m.find() == FALSE) {
222050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
222150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
222250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.start(status) == i);
222350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.end(status) == i);
2224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
222527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(i==20);
222650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
222750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&s);
222850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
222950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
223050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // find() loop breaking test.
223150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //        with pattern of /.?/, should see a series of one char matches, then a single
223250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //        match of zero length at the end of the input string.
223350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t                 i;
223450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode          status=U_ZERO_ERROR;
223550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher        m(".?", 0, status);
223650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
223750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText s = UTEXT_INITIALIZER;
223850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&s, "    ", -1, &status);
223950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(&s);
224050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (i=0; ; i++) {
224150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (m.find() == FALSE) {
224250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
224350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
224450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.start(status) == i);
224550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));
224650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
224750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(i==5);
224850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
224950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&s);
2250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
225150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
225450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Matchers with no input string behave as if they had an empty input string.
2255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //
2256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
225750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
225850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
225950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher  m(".?", 0, status);
226050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
226150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.find());
226250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.start(status) == 0);
226350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.input() == "");
2264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
226550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
226650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
226750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern  *p = RegexPattern::compile(".", 0, status);
226850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher  *m = p->matcher(status);
226950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
2270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
227150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m->find() == FALSE);
227250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0);
227350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete m;
227450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete p;
2275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
227650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
227750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
227850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Regions
227950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
228050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
228150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
228250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText testPattern = UTEXT_INITIALIZER;
228350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText testText    = UTEXT_INITIALIZER;
228427f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status);
228527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_VERBOSE_TEXT(&testPattern);
228627f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status);
228727f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_VERBOSE_TEXT(&testText);
228850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
228950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher m(&testPattern, &testText, 0, status);
229050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
229150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionStart() == 0);
229250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
229350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
229450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
229550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
229650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.region(2,4, status);
229750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
229850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.matches(status));
229950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.start(status)==2);
230050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.end(status)==4);
230150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
230250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
230350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset();
230450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionStart() == 0);
230550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
230650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
230727f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&testText, "short", -1, &status);
230827f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_VERBOSE_TEXT(&testText);
230950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(&testText);
231050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionStart() == 0);
231150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short"));
231250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
231350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
231450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
231550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
231650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.reset());
231750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
231850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
231950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
232050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
232150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.reset());
232250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
232350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
232450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
232550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
232650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
232750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.reset());
232850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
2329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
233050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));
233150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
233250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(&m == &m.reset());
233350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
233450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
233550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&testText);
233650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&testPattern);
2337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
233850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
233950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
234050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // hitEnd() and requireEnd()
234150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
234250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
234350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
234450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText testPattern = UTEXT_INITIALIZER;
234550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText testText    = UTEXT_INITIALIZER;
234627f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */
234727f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */
234827f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&testPattern, str_, -1, &status);
234927f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&testText, str_aabb, -1, &status);
235050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
235150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher m1(&testPattern, &testText,  0, status);
235250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1.lookingAt(status) == TRUE);
235350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1.hitEnd() == TRUE);
235450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m1.requireEnd() == FALSE);
235550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
235650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
235750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
235827f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */
235927f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&testPattern, str_a, -1, &status);
236050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher m2(&testPattern, &testText, 0, status);
236150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m2.lookingAt(status) == TRUE);
236250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m2.hitEnd() == FALSE);
236350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m2.requireEnd() == FALSE);
236450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
2365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
236650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
236727f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */
236827f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&testPattern, str_dotstardollar, -1, &status);
236950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher m3(&testPattern, &testText, 0, status);
237050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m3.lookingAt(status) == TRUE);
237150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m3.hitEnd() == TRUE);
237250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(m3.requireEnd() == TRUE);
237350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
237450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
237550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&testText);
237650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&testPattern);
2377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
2378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
2379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//---------------------------------------------------------------------------
2382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
238350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//      API_Replace_UTF8   API test for class RegexMatcher, testing the
238450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                         Replace family of functions.
2385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
2386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------
238750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Replace_UTF8() {
238850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
238950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Replace
239050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
239150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             flags=0;
239250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError         pe;
239350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode          status=U_ZERO_ERROR;
2394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
239550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText               re=UTEXT_INITIALIZER;
239627f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&re, "abc", -1, &status);
239727f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_VERBOSE_TEXT(&re);
239850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
239950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
240050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
240127f654740f2a26ad62a5c155af9199af9e69b889claireho    char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */
240250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //             012345678901234567
240350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText dataText = UTEXT_INITIALIZER;
240450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&dataText, data, -1, &status);
240527f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_CHECK_STATUS;
240627f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_VERBOSE_TEXT(&dataText);
2407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText);
2408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
240950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
241050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Plain vanilla matches.
241150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
241250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString  dest;
241350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText destText = UTEXT_INITIALIZER;
241450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUnicodeString(&destText, &dest, &status);
241550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText *result;
241650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
241750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText replText = UTEXT_INITIALIZER;
241850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
241927f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */
242027f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&replText, str_yz, -1, &status);
242127f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_VERBOSE_TEXT(&replText);
242250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, NULL, status);
242350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
242427f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */
242527f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result);
242650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
242750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, &destText, status);
242850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
242950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
243027f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result);
2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
243250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, NULL, status);
243350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
243427f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_yzyzyz[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x00 }; /* .yz..yz...yz.. */
243527f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result);
243650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
2437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
243850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
243950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, &destText, status);
244050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
244150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
244227f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result);
244350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
244450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
244550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Plain vanilla non-matches.
244650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
244727f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */
244827f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&dataText, str_abxabxabx, -1, &status);
244950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher->reset(&dataText);
245050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
245150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, NULL, status);
245250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
245327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
245450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
245550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, &destText, status);
245650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
245750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
245827f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
245950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
246050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, NULL, status);
246150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
246227f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
246350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
246450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
246550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, &destText, status);
246650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
246750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
246827f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
246950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
247050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
247150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Empty source string
247250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
247350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&dataText, NULL, 0, &status);
247450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher->reset(&dataText);
247550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
247650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, NULL, status);
247750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
247827f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8("", result);
247950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
248050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, &destText, status);
248150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
248250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
248327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8("", result);
248450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
248550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, NULL, status);
248650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
248727f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8("", result);
248850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
248950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, &destText, status);
249050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
249150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
249227f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8("", result);
249350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
249450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
249550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Empty substitution string
249650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
249750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.."
249850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher->reset(&dataText);
249950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
250050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&replText, NULL, 0, &status);
250150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, NULL, status);
250250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
250327f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */
250427f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);
250550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
250650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, &destText, status);
250750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
250850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
250927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);
251050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
251150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, NULL, status);
251250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
251327f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_dots[] = { 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x00 }; /* ........ */
251427f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_dots, result);
251550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
251650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
251750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, &destText, status);
251850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
251950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
252027f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_dots, result);
252150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
252250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
252350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // match whole string
252450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
252527f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
252627f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&dataText, str_abc, -1, &status);
252750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher->reset(&dataText);
252850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
252927f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_xyz[] = { 0x78, 0x79, 0x7a, 0x00 }; /* xyz */
253027f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&replText, str_xyz, -1, &status);
253150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, NULL, status);
253250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
253327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
253450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
253550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
253650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceFirst(&replText, &destText, status);
253750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
253850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
253927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
254050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
254150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, NULL, status);
254250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
254327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
254450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
254550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
254650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher->replaceAll(&replText, &destText, status);
254750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
254850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
254927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
255050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
255150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
255250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Capture Group, simple case
255350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
255427f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */
255527f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&re, str_add, -1, &status);
255650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status);
255750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
255827f654740f2a26ad62a5c155af9199af9e69b889claireho
255927f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* abcdefg */
256027f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&dataText, str_abcdefg, -1, &status);
2561b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText);
256250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
256350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
256427f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */
256527f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&replText, str_11, -1, &status);
256650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, NULL, status);
256750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
256827f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* bcbcdefg */
256927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);
257050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
257150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
257250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, &destText, status);
257350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
257450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
257527f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);
2576b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
2577b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */
2578b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    utext_openUTF8(&replText, str_v, -1, &status);
2579b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_VERBOSE_TEXT(&replText);
258050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, NULL, status);
258150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
258227f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg */
258327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
258450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
258550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
258650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, &destText, status);
258750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
258850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
258927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
259050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
259127f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */
259227f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);
259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, NULL, status);
259450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
259527f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */
259627f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);
259750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
259850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
259950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, &destText, status);
260050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
260150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
260227f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);
260350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
260427f654740f2a26ad62a5c155af9199af9e69b889claireho    unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx. */
260527f654740f2a26ad62a5c155af9199af9e69b889claireho    //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001D7CF, MATHEMATICAL BOLD DIGIT ONE
260650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //                                 012345678901234567890123456
260750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    supplDigitChars[22] = 0xF0;
260850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    supplDigitChars[23] = 0x9D;
260950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    supplDigitChars[24] = 0x9F;
261050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    supplDigitChars[25] = 0x8F;
261150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status);
261250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
261350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, NULL, status);
261450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
261527f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */
261627f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);
261750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
261850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
261950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    result = matcher2->replaceFirst(&replText, &destText, status);
262050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
262150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
262227f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);
262327f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e,  0x00 }; /* bad capture group number $5..." */
262427f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status);
262550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status)), U_INDEX_OUTOFBOUNDS_ERROR);
262627f654740f2a26ad62a5c155af9199af9e69b889claireho//    REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);
262750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(result);
262850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
262950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, status)), U_INDEX_OUTOFBOUNDS_ERROR);
263050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(result == &destText);
263127f654740f2a26ad62a5c155af9199af9e69b889claireho//    REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);
263250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
263350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
263450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Replacement String with \u hex escapes
263550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
2636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
263727f654740f2a26ad62a5c155af9199af9e69b889claireho      const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61, 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 abc 2 abc 3 */
263827f654740f2a26ad62a5c155af9199af9e69b889claireho      const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33, 0x2d, 0x2d, 0x00 }; /* --\u0043-- */
263927f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status);
264027f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&replText, str_u0043, -1, &status);
264150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->reset(&dataText);
264250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
264350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->replaceAll(&replText, NULL, status);
264450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
264527f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */
264627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);
264750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
264850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
264950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->replaceAll(&replText, &destText, status);
265050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
265150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
265227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);
265350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
265450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
265527f654740f2a26ad62a5c155af9199af9e69b889claireho      const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */
265627f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&dataText, str_abc, -1, &status);
265727f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */
265827f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&replText, str_U00010000, -1, &status);
265950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->reset(&dataText);
266050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
266127f654740f2a26ad62a5c155af9199af9e69b889claireho        unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A"
266250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //                          0123456789
266350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expected[2] = 0xF0;
266450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expected[3] = 0x90;
266550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expected[4] = 0x80;
266650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expected[5] = 0x80;
266750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
266850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->replaceAll(&replText, NULL, status);
266950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
267027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);
267150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(result);
267250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
267350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = matcher->replaceAll(&replText, &destText, status);
267450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
267550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &destText);
267627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);
2677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
267850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // TODO:  need more through testing of capture substitutions.
2679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
268050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Bug 4057
268150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
268250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
268350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
268427f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65, 0x00 }; /* ss(.*?)ee */
268527f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66, 0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ss stuff ee fin */
268627f654740f2a26ad62a5c155af9199af9e69b889clairehoconst char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
268727f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&re, str_ssee, -1, &status);
268827f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&dataText, str_blah, -1, &status);
268927f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&replText, str_ooh, -1, &status);
269050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
269150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher m(&re, 0, status);
269250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
269350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
269450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString result;
269550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText resultText = UTEXT_INITIALIZER;
269650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&resultText, &result, &status);
2697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
269850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Multiple finds do NOT bump up the previous appendReplacement postion.
269950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(&dataText);
270050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find();
270150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find();
270250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.appendReplacement(&resultText, &replText, status);
270350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
270427f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_blah2[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */
270527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_blah2, &resultText);
2706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
270750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // After a reset into the interior of a string, appendReplacement still starts at beginning.
270850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
270950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result.truncate(0);
271050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&resultText, &result, &status);
271150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset(10, status);
271250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find();
271350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find();
271450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.appendReplacement(&resultText, &replText, status);
271550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
271627f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_blah3[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */
271727f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_blah3, &resultText);
2718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
271950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // find() at interior of string, appendReplacement still starts at beginning.
272050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
272150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result.truncate(0);
272250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&resultText, &result, &status);
272350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.reset();
272450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find(10, status);
272550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.find();
272650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        m.appendReplacement(&resultText, &replText, status);
272750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
272827f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */
272927f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText);
2730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
273127f654740f2a26ad62a5c155af9199af9e69b889claireho        m.appendTail(&resultText, status);
273227f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */
273327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText);
273450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
273550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&resultText);
273650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2737b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
273850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete matcher2;
273950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat2;
274050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete matcher;
274150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat;
274250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
274350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&dataText);
274450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&replText);
274550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&destText);
274650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&re);
2747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
275050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
2751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
275250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//      API_Pattern_UTF8  Test that the API for class RegexPattern is
275350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                        present and nominally working.
275450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
275550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
275650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::API_Pattern_UTF8() {
275750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        pata;    // Test default constructor to not crash.
275850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        patb;
275950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
276050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pata == patb);
276150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pata == pata);
276250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
276350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText         re1 = UTEXT_INITIALIZER;
276450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText         re2 = UTEXT_INITIALIZER;
276550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode    status = U_ZERO_ERROR;
276650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError   pe;
276750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
276827f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */
276927f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */
277027f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&re1, str_abcalmz, -1, &status);
277127f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&re2, str_def, -1, &status);
277250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
277350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *pat1 = RegexPattern::compile(&re1, 0, pe, status);
277450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *pat2 = RegexPattern::compile(&re2, 0, pe, status);
277550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
277650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1 == *pat1);
277750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1 != pata);
277850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
277950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Assign
278050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    patb = *pat1;
278150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(patb == *pat1);
278250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
278350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Copy Construct
278450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern patc(*pat1);
278550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(patc == *pat1);
278650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(patb == patc);
278750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pat1 != pat2);
278850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    patb = *pat2;
278950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(patb != patc);
279050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(patb == *pat2);
279150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
279250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Compile with no flags.
279350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern         *pat1a = RegexPattern::compile(&re1, pe, status);
279450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1a == *pat1);
279550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
279650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pat1a->flags() == 0);
279750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
279850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Compile with different flags should be not equal
279950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *pat1b = RegexPattern::compile(&re1, UREGEX_CASE_INSENSITIVE, pe, status);
280050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
280150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
280250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1b != *pat1a);
280350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE);
280450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pat1a->flags() == 0);
280550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1b;
280650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
280750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // clone
280850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *pat1c = pat1->clone();
280950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1c == *pat1);
281050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(*pat1c != *pat2);
281150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
281250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1c;
281350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1a;
281450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
281550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat2;
281650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
281750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&re1);
281850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&re2);
281950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
282050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
282150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
282250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   Verify that a matcher created from a cloned pattern works.
282350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     (Jitterbug 3423)
282450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
282550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
282650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode     status     = U_ZERO_ERROR;
282750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText          pattern    = UTEXT_INITIALIZER;
282827f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */
282927f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_pL, -1, &status);
283050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
283150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern  *pSource    = RegexPattern::compile(&pattern, 0, status);
283250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern  *pClone     = pSource->clone();
283350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete         pSource;
283450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher  *mFromClone = pClone->matcher(status);
283550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
283650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
283750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText          input      = UTEXT_INITIALIZER;
283827f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */
283927f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_HelloWorld, -1, &status);
284050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        mFromClone->reset(&input);
284150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(mFromClone->find() == TRUE);
284250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(mFromClone->group(status) == "Hello");
284350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(mFromClone->find() == TRUE);
284450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(mFromClone->group(status) == "World");
284550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(mFromClone->find() == FALSE);
284650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete mFromClone;
284750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pClone;
284850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
284950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input);
285050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&pattern);
285150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
285250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
285350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
285450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   matches convenience API
285550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
285650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
285750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status  = U_ZERO_ERROR;
285850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText      pattern = UTEXT_INITIALIZER;
285950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText      input   = UTEXT_INITIALIZER;
286050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
286127f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */
286227f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_randominput, -1, &status);
286350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
286427f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */
286527f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_dotstar, -1, &status);
286650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE);
286750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
286850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
286927f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
287027f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_abc, -1, &status);
287150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);
287250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
287350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
287427f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */
287527f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_nput, -1, &status);
287650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);
287750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
287850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
287927f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_randominput, -1, &status);
288050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);
288150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
288250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
288327f654740f2a26ad62a5c155af9199af9e69b889claireho        const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */
288427f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_u, -1, &status);
288550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);
288650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
288750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
288827f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&input, str_abc, -1, &status);
288927f654740f2a26ad62a5c155af9199af9e69b889claireho        utext_openUTF8(&pattern, str_abc, -1, &status);
289050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
289150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);
289250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
289350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
289450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&input);
289550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&pattern);
289650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
289750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
289850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
289950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
290050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Split()
290150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
290250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
290327f654740f2a26ad62a5c155af9199af9e69b889claireho    const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /*  + */
290427f654740f2a26ad62a5c155af9199af9e69b889claireho    utext_openUTF8(&re1, str_spaceplus, -1, &status);
290550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pat1 = RegexPattern::compile(&re1, pe, status);
290650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
290750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString  fields[10];
290850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
290950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t n;
291050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("Now is the time", fields, 10, status);
291150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
291250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==4);
291350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="Now");
291450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="is");
291550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="the");
291650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="time");
291750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="");
291850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
291950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("Now is the time", fields, 2, status);
292050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
292150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==2);
292250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="Now");
292350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="is the time");
292450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="the");   // left over from previous test
292550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
292650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fields[1] = "*";
292750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
292850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("Now is the time", fields, 1, status);
292950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
293050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==1);
293150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="Now is the time");
293250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="*");
293350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
293450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
293550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("    Now       is the time   ", fields, 10, status);
293650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
2937b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==6);
293850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="");
293950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="Now");
294050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="is");
294150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="the");
294250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="time");
294350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[5]=="");
2944b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[6]=="");
294550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2946b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    fields[2] = "*";
294750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("     ", fields, 10, status);
294850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
2949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==2);
295050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="");
2951b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[1]=="");
2952b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[2]=="*");
295350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
295450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fields[0] = "foo";
295550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("", fields, 10, status);
295650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
295750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==0);
295850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="foo");
295950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
296050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
296150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
296250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  split, with a pattern with (capture)
296327f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&re1, "<(\\w*)>", -1, &status);
296450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pat1 = RegexPattern::compile(&re1,  pe, status);
296550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
296650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
296750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
2968b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    fields[6] = fields[7] = "*";
296950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status);
297050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
2971b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==7);
297250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="");
297350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
297450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
297550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="b");
297650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="the time");
297750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[5]=="c");
297850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[6]=="");
2979b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[7]=="*");
298050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(status==U_ZERO_ERROR);
298150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2982b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    fields[6] = fields[7] = "*";
298350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("  <a>Now is <b>the time<c>", fields, 10, status);
298450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
2985b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(n==7);
298650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="  ");
298750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
298850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
298950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="b");
299050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="the time");
299150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[5]=="c");
299250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[6]=="");
2993b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[7]=="*");
299450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
299550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
299650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fields[6] = "foo";
2997b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    n = pat1->split("  <a>Now is <b>the time<c> ", fields, 6, status);
299850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
299950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==6);
300050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="  ");
300150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
300250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
300350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="b");
300450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="the time");
3005b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(fields[5]==" ");
300650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[6]=="foo");
300750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
300850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
300950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fields[5] = "foo";
301050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("  <a>Now is <b>the time<c>", fields, 5, status);
301150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
301250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==5);
301350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="  ");
301450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
301550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
301650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="b");
301750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="the time<c>");
301850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[5]=="foo");
301950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
302050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
302150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fields[5] = "foo";
302250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("  <a>Now is <b>the time", fields, 5, status);
302350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
302450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==5);
302550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="  ");
302650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
302750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
302850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="b");
302950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="the time");
303050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[5]=="foo");
303150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
303250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
303350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("  <a>Now is <b>the time<c>", fields, 4, status);
303450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
303550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==4);
303650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="  ");
303750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="a");
303850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="Now is ");
303950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]=="the time<c>");
304050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
304150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
304250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
304327f654740f2a26ad62a5c155af9199af9e69b889claireho    regextst_openUTF8FromInvariant(&re1, "([-,])", -1, &status);
304450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pat1 = RegexPattern::compile(&re1, pe, status);
304550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
304650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    n = pat1->split("1-10,20", fields, 10, status);
304750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
304850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(n==5);
304950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[0]=="1");
305050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[1]=="-");
305150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[2]=="10");
305250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[3]==",");
305350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(fields[4]=="20");
305450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
305550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
305650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
305750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
305850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // RegexPattern::pattern() and patternText()
305950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
306050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pat1 = new RegexPattern();
306150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(pat1->pattern() == "");
306227f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText(status));
306350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
3064b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const char *helloWorldInvariant = "(Hello, world)*";
3065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status);
306650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pat1 = RegexPattern::compile(&re1, pe, status);
306750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
3068b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT_UNISTR(pat1->pattern(),"(Hello, world)*");
306927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status));
307050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat1;
307150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
307250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&re1);
307350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
307450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
307550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
307650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
307750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
307850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//      Extended       A more thorough check for features of regex patterns
307950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     The test cases are in a separate data file,
308050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       source/tests/testdata/regextst.txt
308150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     A description of the test data format is included in that file.
308250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
308350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
308450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
308550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst char *
308650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexTest::getPath(char buffer[2048], const char *filename) {
308750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode status=U_ZERO_ERROR;
308850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char *testDataDirectory = IntlTest::getSourceTestData(status);
308950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
309050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("ERROR: loadTestData() failed - %s", u_errorName(status));
309150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
309250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
309350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
309450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    strcpy(buffer, testDataDirectory);
309550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    strcat(buffer, filename);
309650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return buffer;
309750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
309850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
309950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Extended() {
310050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char tdd[2048];
310150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char *srcPath;
310250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode  status  = U_ZERO_ERROR;
310350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     lineNum = 0;
310450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
310550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
310650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Open and read the test data file.
310750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
310850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    srcPath=getPath(tdd, "regextst.txt");
310950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(srcPath==NULL) {
311050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return; /* something went wrong, error already output */
311150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
311250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
311350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t    len;
311450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *testData = ReadAndConvertFile(srcPath, len, "utf-8", status);
311550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
311650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return; /* something went wrong, error already output */
311750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
311850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
311950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
312050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Put the test data into a UnicodeString
312150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
312250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString testString(FALSE, testData, len);
312350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
312450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher    quotedStuffMat(UNICODE_STRING_SIMPLE("\\s*([\\'\\\"/])(.*?)\\1"), 0, status);
312550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher    commentMat    (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
3126103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    RegexMatcher    flagsMat      (UNICODE_STRING_SIMPLE("\\s*([ixsmdteDEGLMQvabtyYzZ2-9]*)([:letter:]*)"), 0, status);
312750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
312850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher    lineMat(UNICODE_STRING_SIMPLE("(.*?)\\r?\\n"), testString, 0, status);
312950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString   testPattern;   // The pattern for test from the test file.
313050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString   testFlags;     // the flags   for a test.
313150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString   matchString;   // The marked up string to be used as input
313250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
313350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)){
313450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("Construct RegexMatcher() error.");
313550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete [] testData;
313650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
313750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
313850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
313950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
314050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Loop over the test data file, once per line.
314150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
314250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while (lineMat.find()) {
314350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lineNum++;
314450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
314527f654740f2a26ad62a5c155af9199af9e69b889claireho          errln("%s:%d: ICU Error \"%s\"", srcPath, lineNum, u_errorName(status));
314650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
314750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
314850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
314950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString testLine = lineMat.group(1, status);
315050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (testLine.length() == 0) {
315150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
315250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
315350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
315450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
315550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Parse the test line.  Skip blank and comment only lines.
315650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Separate out the three main fields - pattern, flags, target.
315750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
315850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
315950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        commentMat.reset(testLine);
316050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (commentMat.lookingAt(status)) {
316150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // This line is a comment, or blank.
316250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
316350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
316450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
316550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
316650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Pull out the pattern field, remove it from the test file line.
316750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
316850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        quotedStuffMat.reset(testLine);
316950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (quotedStuffMat.lookingAt(status)) {
317050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            testPattern = quotedStuffMat.group(2, status);
317150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            testLine.remove(0, quotedStuffMat.end(0, status));
317250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
317327f654740f2a26ad62a5c155af9199af9e69b889claireho            errln("Bad pattern (missing quotes?) at %s:%d", srcPath, lineNum);
317450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
317550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
317650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
317750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
317850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
317950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Pull out the flags from the test file line.
318050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
318150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        flagsMat.reset(testLine);
318250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        flagsMat.lookingAt(status);                  // Will always match, possibly an empty string.
318350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        testFlags = flagsMat.group(1, status);
318450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flagsMat.group(2, status).length() > 0) {
318550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Bad Match flag at line %d. Scanning %c\n",
318650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                lineNum, flagsMat.group(2, status).charAt(0));
318750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
318850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
318950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        testLine.remove(0, flagsMat.end(0, status));
319050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
319150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
319250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Pull out the match string, as a whole.
319350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    We'll process the <tags> later.
319450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
319550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        quotedStuffMat.reset(testLine);
319650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (quotedStuffMat.lookingAt(status)) {
319750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            matchString = quotedStuffMat.group(2, status);
319850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            testLine.remove(0, quotedStuffMat.end(0, status));
319950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
320050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Bad match string at test file line %d", lineNum);
320150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
320250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
320350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
320450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
320550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  The only thing left from the input line should be an optional trailing comment.
320650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
320750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        commentMat.reset(testLine);
320850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (commentMat.lookingAt(status) == FALSE) {
320950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Line %d: unexpected characters at end of test line.", lineNum);
321050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
321150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
321250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
321450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Run the test
321550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
321627f654740f2a26ad62a5c155af9199af9e69b889claireho        regex_find(testPattern, testFlags, matchString, srcPath, lineNum);
321750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
321850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete [] testData;
322050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
322150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
322250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
322350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
322450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
322550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
322650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
322750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    regex_find(pattern, flags, inputString, lineNumber)
322850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
322950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//         Function to run a single test from the Extended (data driven) tests.
323050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//         See file test/testdata/regextst.txt for a description of the
323150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//         pattern and inputString fields, and the allowed flags.
323250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//         lineNumber is the source line in regextst.txt of the test.
323350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
323450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
323550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
323650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
323750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  Set a value into a UVector at position specified by a decimal number in
323850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   a UnicodeString.   This is a utility function needed by the actual test function,
323950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   which follows.
324050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic void set(UVector &vec, int32_t val, UnicodeString index) {
324150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode  status=U_ZERO_ERROR;
324250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t  idx = 0;
324350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (int32_t i=0; i<index.length(); i++) {
324450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t d=u_charDigitValue(index.charAt(i));
324550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (d<0) {return;}
324650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        idx = idx*10 + d;
324750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
324850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while (vec.size()<idx+1) {vec.addElement(-1, status);}
324950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    vec.setElementAt(val, idx);
325050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
325150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
325227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic void setInt(UVector &vec, int32_t val, int32_t idx) {
325327f654740f2a26ad62a5c155af9199af9e69b889claireho    UErrorCode  status=U_ZERO_ERROR;
325427f654740f2a26ad62a5c155af9199af9e69b889claireho    while (vec.size()<idx+1) {vec.addElement(-1, status);}
325527f654740f2a26ad62a5c155af9199af9e69b889claireho    vec.setElementAt(val, idx);
325627f654740f2a26ad62a5c155af9199af9e69b889claireho}
325727f654740f2a26ad62a5c155af9199af9e69b889claireho
325827f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool utextOffsetToNative(UText *utext, int32_t unistrOffset, int32_t& nativeIndex)
325927f654740f2a26ad62a5c155af9199af9e69b889claireho{
326027f654740f2a26ad62a5c155af9199af9e69b889claireho    UBool couldFind = TRUE;
326127f654740f2a26ad62a5c155af9199af9e69b889claireho    UTEXT_SETNATIVEINDEX(utext, 0);
326227f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t i = 0;
326327f654740f2a26ad62a5c155af9199af9e69b889claireho    while (i < unistrOffset) {
326427f654740f2a26ad62a5c155af9199af9e69b889claireho        UChar32 c = UTEXT_NEXT32(utext);
326527f654740f2a26ad62a5c155af9199af9e69b889claireho        if (c != U_SENTINEL) {
326627f654740f2a26ad62a5c155af9199af9e69b889claireho            i += U16_LENGTH(c);
326727f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
326827f654740f2a26ad62a5c155af9199af9e69b889claireho            couldFind = FALSE;
326927f654740f2a26ad62a5c155af9199af9e69b889claireho            break;
327027f654740f2a26ad62a5c155af9199af9e69b889claireho        }
327127f654740f2a26ad62a5c155af9199af9e69b889claireho    }
3272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    nativeIndex = (int32_t)UTEXT_GETNATIVEINDEX(utext);
327327f654740f2a26ad62a5c155af9199af9e69b889claireho    return couldFind;
327427f654740f2a26ad62a5c155af9199af9e69b889claireho}
327527f654740f2a26ad62a5c155af9199af9e69b889claireho
327627f654740f2a26ad62a5c155af9199af9e69b889claireho
327750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::regex_find(const UnicodeString &pattern,
327850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           const UnicodeString &flags,
327950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           const UnicodeString &inputString,
328027f654740f2a26ad62a5c155af9199af9e69b889claireho                           const char *srcPath,
328150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           int32_t line) {
328250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString       unEscapedInput;
328350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString       deTaggedInput;
328450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
328550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             patternUTF8Length,      inputUTF8Length;
328650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char                *patternChars  = NULL, *inputChars = NULL;
328750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText               patternText    = UTEXT_INITIALIZER;
328850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText               inputText      = UTEXT_INITIALIZER;
328950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UConverter          *UTF8Converter = NULL;
329050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
329150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode          status         = U_ZERO_ERROR;
329250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError         pe;
329350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *parsePat      = NULL;
329450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher        *parseMatcher  = NULL;
329550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern        *callerPattern = NULL, *UTF8Pattern = NULL;
329650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher        *matcher       = NULL, *UTF8Matcher = NULL;
329750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UVector             groupStarts(status);
329850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UVector             groupEnds(status);
329927f654740f2a26ad62a5c155af9199af9e69b889claireho    UVector             groupStartsUTF8(status);
330027f654740f2a26ad62a5c155af9199af9e69b889claireho    UVector             groupEndsUTF8(status);
330150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool               isMatch        = FALSE, isUTF8Match = FALSE;
330250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool               failed         = FALSE;
330350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             numFinds;
330450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             i;
330550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool               useMatchesFunc   = FALSE;
330650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool               useLookingAtFunc = FALSE;
330750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             regionStart      = -1;
330850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t             regionEnd        = -1;
330927f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t             regionStartUTF8  = -1;
331027f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t             regionEndUTF8    = -1;
331127f654740f2a26ad62a5c155af9199af9e69b889claireho
331250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
331350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
331450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Compile the caller's pattern
331550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
331650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint32_t bflags = 0;
331750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x69) >= 0)  { // 'i' flag
331850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_CASE_INSENSITIVE;
331950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
332050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x78) >= 0)  { // 'x' flag
332150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_COMMENTS;
332250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
332350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x73) >= 0)  { // 's' flag
332450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_DOTALL;
332550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
332650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x6d) >= 0)  { // 'm' flag
332750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_MULTILINE;
332850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
332950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
333050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag
333150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;
333250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
333350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag
333450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bflags |= UREGEX_UNIX_LINES;
333550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
3336103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag
3337103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        bflags |= UREGEX_LITERAL;
3338103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
333950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
334050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
334150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    callerPattern = RegexPattern::compile(pattern, bflags, pe, status);
334250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (status != U_ZERO_ERROR) {
334350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        #if UCONFIG_NO_BREAK_ITERATION==1
334450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // 'v' test flag means that the test pattern should not compile if ICU was configured
334550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //     to not include break iteration.  RBBI is needed for Unicode word boundaries.
334650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) {
334750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto cleanupAndReturn;
334850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
334950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        #endif
335050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flags.indexOf((UChar)0x45) >= 0) {  //  flags contain 'E'
335150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Expected pattern compilation error.
335250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (flags.indexOf((UChar)0x64) >= 0) {   // flags contain 'd'
335350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                logln("Pattern Compile returns \"%s\"", u_errorName(status));
335450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
335550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto cleanupAndReturn;
335650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
335750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Unexpected pattern compilation error.
3358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            dataerrln("Line %d: error %s compiling pattern.", line, u_errorName(status));
335950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto cleanupAndReturn;
336050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
336150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
336250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
336350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UTF8Converter = ucnv_open("UTF8", &status);
336450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
336550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
336650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status);
336750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR; // buffer overflow
336850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    patternChars = new char[patternUTF8Length+1];
336950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status);
337050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status);
337150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
337250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (status == U_ZERO_ERROR) {
337350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status);
337450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
337550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status != U_ZERO_ERROR) {
337650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==1
337750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // 'v' test flag means that the test pattern should not compile if ICU was configured
337850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //     to not include break iteration.  RBBI is needed for Unicode word boundaries.
337950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) {
338050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto cleanupAndReturn;
338150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
338250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
338350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (flags.indexOf((UChar)0x45) >= 0) {  //  flags contain 'E'
338450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Expected pattern compilation error.
338550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (flags.indexOf((UChar)0x64) >= 0) {   // flags contain 'd'
338650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(status));
338750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
338850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto cleanupAndReturn;
338950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
339050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Unexpected pattern compilation error.
339150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                errln("Line %d: error %s compiling pattern. (UTF8)", line, u_errorName(status));
339250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto cleanupAndReturn;
339350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
339450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
339550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
339650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
339750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTF8Pattern == NULL) {
339850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
339927f654740f2a26ad62a5c155af9199af9e69b889claireho        logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line);
340050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
340150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
340250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
340350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x64) >= 0) {  // 'd' flag
340450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPatternDump(callerPattern);
340550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
340650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
340750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x45) >= 0) {  // 'E' flag
340827f654740f2a26ad62a5c155af9199af9e69b889claireho        errln("%s, Line %d: Expected, but did not get, a pattern compilation error.", srcPath, line);
340950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanupAndReturn;
341050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
341150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
341250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
341350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
341450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Number of times find() should be called on the test string, default to 1
341550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
341650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    numFinds = 1;
341750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i=2; i<=9; i++) {
341850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flags.indexOf((UChar)(0x30 + i)) >= 0) {   // digit flag
341950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (numFinds != 1) {
342050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                errln("Line %d: more than one digit flag.  Scanning %d.", line, i);
342150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto cleanupAndReturn;
342250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
342350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            numFinds = i;
342450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
342550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
342650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
342750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // 'M' flag.  Use matches() instead of find()
342850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x4d) >= 0) {
342950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        useMatchesFunc = TRUE;
343050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
343150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x4c) >= 0) {
343250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        useLookingAtFunc = TRUE;
343350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
343450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
343550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
343650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Find the tags in the input data, remove them, and record the group boundary
343750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    positions.
343850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
343950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    parsePat = RegexPattern::compile("<(/?)(r|[0-9]+)>", 0, pe, status);
344050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS_L(line);
344150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
344250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    unEscapedInput = inputString.unescape();
344350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    parseMatcher = parsePat->matcher(unEscapedInput, status);
344450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS_L(line);
344550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(parseMatcher->find()) {
344650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        parseMatcher->appendReplacement(deTaggedInput, "", status);
344750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
344850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString groupNum = parseMatcher->group(2, status);
344950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (groupNum == "r") {
345050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // <r> or </r>, a region specification within the string
345150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (parseMatcher->group(1, status) == "/") {
345250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                regionEnd = deTaggedInput.length();
345350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
345450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                regionStart = deTaggedInput.length();
345550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
345650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
345750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // <digits> or </digits>, a group match boundary tag.
345850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (parseMatcher->group(1, status) == "/") {
345950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                set(groupEnds, deTaggedInput.length(), groupNum);
346050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
346150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                set(groupStarts, deTaggedInput.length(), groupNum);
346250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
346350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
346450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
346550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    parseMatcher->appendTail(deTaggedInput);
346650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT_L(groupStarts.size() == groupEnds.size(), line);
346750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ((regionStart>=0 || regionEnd>=0) && (regionStart<0 || regionStart>regionEnd)) {
346850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho      errln("mismatched <r> tags");
346950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho      failed = TRUE;
347050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho      goto cleanupAndReturn;
347150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
347250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
347350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
347450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Configure the matcher according to the flags specified with this test.
347550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
347650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher = callerPattern->matcher(deTaggedInput, status);
347750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS_L(line);
347850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x74) >= 0) {   //  't' trace flag
347950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->setTrace(TRUE);
348050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
348150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
348250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTF8Pattern != NULL) {
348350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status);
348450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR; // buffer overflow
348550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        inputChars = new char[inputUTF8Length+1];
348650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, status);
348750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status);
348850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
348950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status == U_ZERO_ERROR) {
3490b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText);
349150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_CHECK_STATUS_L(line);
349250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
349350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
349450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTF8Matcher == NULL) {
349550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
349627f654740f2a26ad62a5c155af9199af9e69b889claireho          logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line);
349750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
349850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
349950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
350050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
350127f654740f2a26ad62a5c155af9199af9e69b889claireho    //
350227f654740f2a26ad62a5c155af9199af9e69b889claireho    //  Generate native indices for UTF8 versions of region and capture group info
350327f654740f2a26ad62a5c155af9199af9e69b889claireho    //
350427f654740f2a26ad62a5c155af9199af9e69b889claireho    if (UTF8Matcher != NULL) {
350527f654740f2a26ad62a5c155af9199af9e69b889claireho        if (regionStart>=0)    (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8);
350627f654740f2a26ad62a5c155af9199af9e69b889claireho        if (regionEnd>=0)      (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);
350727f654740f2a26ad62a5c155af9199af9e69b889claireho
350827f654740f2a26ad62a5c155af9199af9e69b889claireho        //  Fill out the native index UVector info.
350927f654740f2a26ad62a5c155af9199af9e69b889claireho        //  Only need 1 loop, from above we know groupStarts.size() = groupEnds.size()
351027f654740f2a26ad62a5c155af9199af9e69b889claireho        for (i=0; i<groupStarts.size(); i++) {
351127f654740f2a26ad62a5c155af9199af9e69b889claireho            int32_t  start = groupStarts.elementAti(i);
351227f654740f2a26ad62a5c155af9199af9e69b889claireho            //  -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting
351327f654740f2a26ad62a5c155af9199af9e69b889claireho            if (start >= 0) {
351427f654740f2a26ad62a5c155af9199af9e69b889claireho                int32_t  startUTF8;
351527f654740f2a26ad62a5c155af9199af9e69b889claireho                if (!utextOffsetToNative(&inputText, start, startUTF8)) {
351627f654740f2a26ad62a5c155af9199af9e69b889claireho                    errln("Error at line %d: could not find native index for group start %d.  UTF16 index %d", line, i, start);
351727f654740f2a26ad62a5c155af9199af9e69b889claireho                    failed = TRUE;
351827f654740f2a26ad62a5c155af9199af9e69b889claireho                    goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
351927f654740f2a26ad62a5c155af9199af9e69b889claireho                }
352027f654740f2a26ad62a5c155af9199af9e69b889claireho                setInt(groupStartsUTF8, startUTF8, i);
352127f654740f2a26ad62a5c155af9199af9e69b889claireho            }
352227f654740f2a26ad62a5c155af9199af9e69b889claireho
352327f654740f2a26ad62a5c155af9199af9e69b889claireho            int32_t  end = groupEnds.elementAti(i);
352427f654740f2a26ad62a5c155af9199af9e69b889claireho            //  -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting
352527f654740f2a26ad62a5c155af9199af9e69b889claireho            if (end >= 0) {
352627f654740f2a26ad62a5c155af9199af9e69b889claireho                int32_t  endUTF8;
352727f654740f2a26ad62a5c155af9199af9e69b889claireho                if (!utextOffsetToNative(&inputText, end, endUTF8)) {
352827f654740f2a26ad62a5c155af9199af9e69b889claireho                    errln("Error at line %d: could not find native index for group end %d.  UTF16 index %d", line, i, end);
352927f654740f2a26ad62a5c155af9199af9e69b889claireho                    failed = TRUE;
353027f654740f2a26ad62a5c155af9199af9e69b889claireho                    goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
353127f654740f2a26ad62a5c155af9199af9e69b889claireho                }
353227f654740f2a26ad62a5c155af9199af9e69b889claireho                setInt(groupEndsUTF8, endUTF8, i);
353327f654740f2a26ad62a5c155af9199af9e69b889claireho            }
353427f654740f2a26ad62a5c155af9199af9e69b889claireho        }
353527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
353627f654740f2a26ad62a5c155af9199af9e69b889claireho
353750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (regionStart>=0) {
353850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       matcher->region(regionStart, regionEnd, status);
353950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       REGEX_CHECK_STATUS_L(line);
354050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       if (UTF8Matcher != NULL) {
354127f654740f2a26ad62a5c155af9199af9e69b889claireho           UTF8Matcher->region(regionStartUTF8, regionEndUTF8, status);
354250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           REGEX_CHECK_STATUS_L(line);
354350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       }
354450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
354550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x61) >= 0) {   //  'a' anchoring bounds flag
354650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->useAnchoringBounds(FALSE);
354750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTF8Matcher != NULL) {
354850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTF8Matcher->useAnchoringBounds(FALSE);
354950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
355050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
355150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x62) >= 0) {   //  'b' transparent bounds flag
355250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->useTransparentBounds(TRUE);
355350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTF8Matcher != NULL) {
355450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTF8Matcher->useTransparentBounds(TRUE);
355550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
355650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
355750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
355850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
355950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
356050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
356150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Do a find on the de-tagged input using the caller's pattern
356250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     TODO: error on count>1 and not find().
356350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //           error on both matches() and lookingAt().
356450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
356550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i=0; i<numFinds; i++) {
356650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (useMatchesFunc) {
356750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = matcher->matches(status);
356850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTF8Matcher != NULL) {
356950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               isUTF8Match = UTF8Matcher->matches(status);
357050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
357150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else  if (useLookingAtFunc) {
357250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = matcher->lookingAt(status);
357350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTF8Matcher != NULL) {
357450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isUTF8Match = UTF8Matcher->lookingAt(status);
357550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
357650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
357750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = matcher->find();
357850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTF8Matcher != NULL) {
357950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isUTF8Match = UTF8Matcher->find();
358050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
358150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
358250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
358350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    matcher->setTrace(FALSE);
358450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
358550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
358650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Match up the groups from the find() with the groups from the tags
358750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
358850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
358950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // number of tags should match number of groups from find operation.
359050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // matcher->groupCount does not include group 0, the entire match, hence the +1.
359150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   G option in test means that capture group data is not available in the
359250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     expected results, so the check needs to be suppressed.
359350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (isMatch == FALSE && groupStarts.size() != 0) {
3594b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        dataerrln("Error at line %d:  Match expected, but none found.", line);
359550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
359650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanupAndReturn;
359750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if (UTF8Matcher != NULL && isUTF8Match == FALSE && groupStarts.size() != 0) {
359850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d:  Match expected, but none found. (UTF8)", line);
359950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
360050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanupAndReturn;
360150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
360250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
360350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (flags.indexOf((UChar)0x47 /*G*/) >= 0) {
360450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Only check for match / no match.  Don't check capture groups.
360550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (isMatch && groupStarts.size() == 0) {
360650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d:  No match expected, but one found.", line);
360750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
360850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (UTF8Matcher != NULL && isUTF8Match && groupStarts.size() == 0) {
360950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d:  No match expected, but one found. (UTF8)", line);
361050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
361150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
361250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanupAndReturn;
361350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
361450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
361550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS_L(line);
361650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i=0; i<=matcher->groupCount(); i++) {
361750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t  expectedStart = (i >= groupStarts.size()? -1 : groupStarts.elementAti(i));
361827f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t  expectedStartUTF8 = (i >= groupStartsUTF8.size()? -1 : groupStartsUTF8.elementAti(i));
361950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (matcher->start(i, status) != expectedStart) {
362050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d: incorrect start position for group %d.  Expected %d, got %d",
362150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                line, i, expectedStart, matcher->start(i, status));
362250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
362350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
362427f654740f2a26ad62a5c155af9199af9e69b889claireho        } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expectedStartUTF8) {
362550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d: incorrect start position for group %d.  Expected %d, got %d (UTF8)",
362627f654740f2a26ad62a5c155af9199af9e69b889claireho                  line, i, expectedStartUTF8, UTF8Matcher->start(i, status));
362750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
362850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
362950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
363050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
363150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t  expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i));
363227f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t  expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i));
363350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (matcher->end(i, status) != expectedEnd) {
363450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d: incorrect end position for group %d.  Expected %d, got %d",
363550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                line, i, expectedEnd, matcher->end(i, status));
363650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
363750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Error on end position;  keep going; real error is probably yet to come as group
363850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   end positions work from end of the input data towards the front.
363927f654740f2a26ad62a5c155af9199af9e69b889claireho        } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expectedEndUTF8) {
364050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("Error at line %d: incorrect end position for group %d.  Expected %d, got %d (UTF8)",
364127f654740f2a26ad62a5c155af9199af9e69b889claireho                  line, i, expectedEndUTF8, UTF8Matcher->end(i, status));
364250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            failed = TRUE;
364350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Error on end position;  keep going; real error is probably yet to come as group
364450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   end positions work from end of the input data towards the front.
364550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
364650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
364750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ( matcher->groupCount()+1 < groupStarts.size()) {
364850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: Expected %d capture groups, found %d.",
364950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            line, groupStarts.size()-1, matcher->groupCount());
365050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
365150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
365250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    else if (UTF8Matcher != NULL && UTF8Matcher->groupCount()+1 < groupStarts.size()) {
365350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: Expected %d capture groups, found %d. (UTF8)",
365450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              line, groupStarts.size()-1, UTF8Matcher->groupCount());
365550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
365650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
365750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
365850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ((flags.indexOf((UChar)0x59) >= 0) &&   //  'Y' flag:  RequireEnd() == false
365950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->requireEnd() == TRUE) {
366050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: requireEnd() returned TRUE.  Expected FALSE", line);
366150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
366250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) &&   //  'Y' flag:  RequireEnd() == false
366350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTF8Matcher->requireEnd() == TRUE) {
366450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: requireEnd() returned TRUE.  Expected FALSE (UTF8)", line);
366550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
366650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
366750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
366850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ((flags.indexOf((UChar)0x79) >= 0) &&   //  'y' flag:  RequireEnd() == true
366950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->requireEnd() == FALSE) {
367050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: requireEnd() returned FALSE.  Expected TRUE", line);
367150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
367250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) &&   //  'Y' flag:  RequireEnd() == false
367350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTF8Matcher->requireEnd() == FALSE) {
367450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: requireEnd() returned FALSE.  Expected TRUE (UTF8)", line);
367550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
367650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
367750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
367850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ((flags.indexOf((UChar)0x5A) >= 0) &&   //  'Z' flag:  hitEnd() == false
367950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->hitEnd() == TRUE) {
368050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: hitEnd() returned TRUE.  Expected FALSE", line);
368150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
368250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) &&   //  'Z' flag:  hitEnd() == false
368350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               UTF8Matcher->hitEnd() == TRUE) {
368450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: hitEnd() returned TRUE.  Expected FALSE (UTF8)", line);
368550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
368650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
368750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
368850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ((flags.indexOf((UChar)0x7A) >= 0) &&   //  'z' flag:  hitEnd() == true
368950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matcher->hitEnd() == FALSE) {
369050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: hitEnd() returned FALSE.  Expected TRUE", line);
369150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
369250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) &&   //  'z' flag:  hitEnd() == true
369350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               UTF8Matcher->hitEnd() == FALSE) {
369450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error at line %d: hitEnd() returned FALSE.  Expected TRUE (UTF8)", line);
369550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        failed = TRUE;
369650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
369750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
369850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
369950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehocleanupAndReturn:
370050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (failed) {
370150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        infoln((UnicodeString)"\""+pattern+(UnicodeString)"\"  "
370250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            +flags+(UnicodeString)"  \""+inputString+(UnicodeString)"\"");
370350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // callerPattern->dump();
370450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
370550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete parseMatcher;
370650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete parsePat;
370750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete UTF8Matcher;
370850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete UTF8Pattern;
370950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete matcher;
371050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete callerPattern;
371150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
371250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&inputText);
371350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete[] inputChars;
371450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&patternText);
371550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete[] patternChars;
371650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ucnv_close(UTF8Converter);
371750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
371850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
371950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
372050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
372150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
372250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
372350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
372450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//      Errors     Check for error handling in patterns.
372550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
372650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
372750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Errors() {
372850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // \escape sequences that aren't implemented yet.
372950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //REGEX_ERR("hex format \\x{abcd} not implemented", 1, 13, U_REGEX_UNIMPLEMENTED);
373050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
373150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Missing close parentheses
373250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("Comment (?# with no close", 1, 25, U_REGEX_MISMATCHED_PAREN);
373350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("Capturing Parenthesis(...", 1, 25, U_REGEX_MISMATCHED_PAREN);
373450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("Grouping only parens (?: blah blah", 1, 34, U_REGEX_MISMATCHED_PAREN);
373550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
373650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Extra close paren
373750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("Grouping only parens (?: blah)) blah", 1, 31, U_REGEX_MISMATCHED_PAREN);
373850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR(")))))))", 1, 1, U_REGEX_MISMATCHED_PAREN);
373950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("(((((((", 1, 7, U_REGEX_MISMATCHED_PAREN);
374050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
374150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Look-ahead, Look-behind
374250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  TODO:  add tests for unbounded length look-behinds.
374350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc(?<@xyz).*", 1, 7, U_REGEX_RULE_SYNTAX);       // illegal construct
374450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
374550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Attempt to use non-default flags
374650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
374750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UParseError   pe;
374850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode    status = U_ZERO_ERROR;
374950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t       flags  = UREGEX_CANON_EQ |
375050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                               UREGEX_COMMENTS         | UREGEX_DOTALL   |
375150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                               UREGEX_MULTILINE;
375250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *pat1= RegexPattern::compile(".*", flags, pe, status);
375350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_REGEX_UNIMPLEMENTED);
375450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete pat1;
375550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
375650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
375750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
375850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Quantifiers are allowed only after something that can be quantified.
375950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("+", 1, 1, U_REGEX_RULE_SYNTAX);
376050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc\ndef(*2)", 2, 5, U_REGEX_RULE_SYNTAX);
376150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc**", 1, 5, U_REGEX_RULE_SYNTAX);
376250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
376350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Mal-formed {min,max} quantifiers
376450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{a,2}",1,5, U_REGEX_BAD_INTERVAL);
376550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{4,2}",1,8, U_REGEX_MAX_LT_MIN);
376650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{1,b}",1,7, U_REGEX_BAD_INTERVAL);
376750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{1,,2}",1,7, U_REGEX_BAD_INTERVAL);
376850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{1,2a}",1,8, U_REGEX_BAD_INTERVAL);
376950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{222222222222222222222}",1,14, U_REGEX_NUMBER_TOO_BIG);
377050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{5,50000000000}", 1, 17, U_REGEX_NUMBER_TOO_BIG);        // Overflows int during scan
377150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{5,687865858}", 1, 16, U_REGEX_NUMBER_TOO_BIG);          // Overflows regex binary format
377250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("abc{687865858,687865859}", 1, 24, U_REGEX_NUMBER_TOO_BIG);
377350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
377450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Ticket 5389
377550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX);
377650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
377750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Invalid Back Reference \0
377850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    For ICU 3.8 and earlier
377950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    For ICU versions newer than 3.8, \0 introduces an octal escape.
378050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
378150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE);
378250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
378350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
378450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
378550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
378650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------
378750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
378850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  Read a text data file, convert it to UChars, and return the data
378950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    in one big UChar * buffer, which the caller must delete.
379050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
379150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
379250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
379350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                     const char *defEncoding, UErrorCode &status) {
379450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar       *retPtr  = NULL;
379550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char        *fileBuf = NULL;
379650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UConverter* conv     = NULL;
379750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    FILE        *f       = NULL;
379850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
379950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ulen = 0;
380050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
380150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return retPtr;
380250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
380350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
380450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
380550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Open the file.
380650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
380750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    f = fopen(fileName, "rb");
380850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (f == 0) {
380950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("Error opening test data file %s\n", fileName);
381050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_FILE_ACCESS_ERROR;
381150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
381250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
381350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
381450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Read it in
381550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
381650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t            fileSize;
381750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t            amt_read;
381850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
381950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fseek( f, 0, SEEK_END);
382050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fileSize = ftell(f);
382150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fileBuf = new char[fileSize];
382250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fseek(f, 0, SEEK_SET);
382350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    amt_read = fread(fileBuf, 1, fileSize, f);
382450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (amt_read != fileSize || fileSize <= 0) {
382550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("Error reading test data file.");
382650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanUpAndReturn;
382750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
382850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
382950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
383050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Look for a Unicode Signature (BOM) on the data just read
383150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
383250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t        signatureLength;
383350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char *   fileBufC;
383450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char*    encoding;
383550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
383650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fileBufC = fileBuf;
383750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    encoding = ucnv_detectUnicodeSignature(
383850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fileBuf, fileSize, &signatureLength, &status);
383950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(encoding!=NULL ){
384050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fileBufC  += signatureLength;
384150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fileSize  -= signatureLength;
384250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
384350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        encoding = defEncoding;
384450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (strcmp(encoding, "utf-8") == 0) {
384550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("file %s is missing its BOM", fileName);
384650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
384750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
384850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
384950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
385050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Open a converter to take the rule file to UTF-16
385150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
385250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    conv = ucnv_open(encoding, &status);
385350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
385450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        goto cleanUpAndReturn;
385550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
385650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
385750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
385850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Convert the rules to UChar.
385950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Preflight first to determine required buffer size.
386050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
386150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ulen = ucnv_toUChars(conv,
386250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        NULL,           //  dest,
386350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        0,              //  destCapacity,
386450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fileBufC,
386550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fileSize,
386650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        &status);
386750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (status == U_BUFFER_OVERFLOW_ERROR) {
386850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Buffer Overflow is expected from the preflight operation.
386950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
387050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
387150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retPtr = new UChar[ulen+1];
387250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ucnv_toUChars(conv,
387350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            retPtr,       //  dest,
387450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ulen+1,
387550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fileBufC,
387650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fileSize,
387750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            &status);
387850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
387950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
388050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehocleanUpAndReturn:
388150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fclose(f);
388250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete[] fileBuf;
388350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ucnv_close(conv);
388450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
388550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
3886b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        delete []retPtr;
388750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retPtr = 0;
388850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ulen   = 0;
388950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    };
389050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return retPtr;
389150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
389250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
389350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
389450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------
389550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
389650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   PerlTests  - Run Perl's regular expression tests
389750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                The input file for this test is re_tests, the standard regular
389850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                expression test data distributed with the Perl source code.
389950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
390050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                Here is Perl's description of the test data file:
390150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
390250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # The tests are in a separate file 't/op/re_tests'.
390350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Each line in that file is a separate test.
390450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # There are five columns, separated by tabs.
390550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
390650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 1 contains the pattern, optionally enclosed in C<''>.
390750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Modifiers can be put after the closing C<'>.
390850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
390950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 2 contains the string to be matched.
391050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
391150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 3 contains the expected result:
391250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #     y   expect a match
391350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #     n   expect no match
391450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #     c   expect an error
391550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # B   test exposes a known bug in Perl, should be skipped
391650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # b   test exposes a known bug in Perl, should be skipped if noamp
391750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
391850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Columns 4 and 5 are used only if column 3 contains C<y> or C<c>.
391950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
392050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 4 contains a string, usually C<$&>.
392150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
392250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 5 contains the expected result of double-quote
392350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # interpolating that string after the match, or start of error message.
392450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
392550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # Column 6, if present, contains a reason why the test is skipped.
392650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # This is printed with "skipped", for harness to pick up.
392750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
392850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # \n in the tests are interpolated, as are variables of the form ${\w+}.
392950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        #
393050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # If you want to add a regular expression test that can't be expressed
393150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        # in this format, don't add it here: put it in op/pat.t instead.
393250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
393350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        For ICU, if field 3 contains an 'i', the test will be skipped.
393450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        The test exposes is some known incompatibility between ICU and Perl regexps.
393550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//        (The i is in addition to whatever was there before.)
393650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
393750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------
393850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PerlTests() {
393950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char tdd[2048];
394050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char *srcPath;
394150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode  status = U_ZERO_ERROR;
394250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError pe;
394350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
394450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
394550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Open and read the test data file.
394650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
394750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    srcPath=getPath(tdd, "re_tests.txt");
394850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(srcPath==NULL) {
394950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return; /* something went wrong, error already output */
395050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
395150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
395250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t    len;
395350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status);
395450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
395550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return; /* something went wrong, error already output */
395650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
395750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
395850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
395950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Put the test data into a UnicodeString
396050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
396150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString testDataString(FALSE, testData, len);
396250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
396350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
396450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Regex to break the input file into lines, and strip the new lines.
396550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //     One line per match, capture group one is the desired data.
396650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
396750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status);
396850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
396950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("RegexPattern::compile() error");
397050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
397150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
397250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher* lineMat = linePat->matcher(testDataString, status);
397350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
397450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
397550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Regex to split a test file line into fields.
397650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    There are six fields, separated by tabs.
397750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
397850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status);
397950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
398050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
398150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Regex to identify test patterns with flag settings, and to separate them.
398250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    Test patterns with flags look like 'pattern'i
398350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    Test patterns without flags are not quoted:   pattern
398450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   Coming out, capture group 2 is the pattern, capture group 3 is the flags.
398550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
398650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status);
398750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher* flagMat = flagPat->matcher(status);
398850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
398950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
399050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // The Perl tests reference several perl-isms, which are evaluated/substituted
399150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   in the test data.  Not being perl, this must be done explicitly.  Here
399250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   are string constants and REs for these constructs.
399350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
399450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString nulnulSrc("${nulnul}");
399550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString nulnul("\\u0000\\u0000", -1, US_INV);
399650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    nulnul = nulnul.unescape();
399750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
399850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString ffffSrc("${ffff}");
399950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString ffff("\\uffff", -1, US_INV);
400050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ffff = ffff.unescape();
400150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
400250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  regexp for $-[0], $+[2], etc.
400350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status);
400450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher *groupsMat = groupsPat->matcher(status);
400550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
400650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  regexp for $0, $1, $2, etc.
400750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status);
400850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher *cgMat = cgPat->matcher(status);
400950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
401050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
401150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
401250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Main Loop for the Perl Tests, runs once per line from the
401350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   test data file.
401450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
401550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t  lineNum = 0;
401650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t  skippedUnimplementedCount = 0;
401750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while (lineMat->find()) {
401850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lineNum++;
401950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
402050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
402150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Get a line, break it into its fields, do the Perl
402250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    variable substitutions.
402350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
402450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString line = lineMat->group(1, status);
402550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString fields[7];
402650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fieldPat->split(line, fields, 7, status);
402750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
402850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        flagMat->reset(fields[0]);
402950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        flagMat->matches(status);
403050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString pattern  = flagMat->group(2, status);
403150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pattern.findAndReplace("${bang}", "!");
403250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000"));
403350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pattern.findAndReplace(ffffSrc, ffff);
403450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
403550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
403650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Identify patterns that include match flag settings,
403750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    split off the flags, remove the extra quotes.
403850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
403950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString flagStr = flagMat->group(3, status);
404050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
404150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
404250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return;
404350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
404450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t flags = 0;
404550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar UChar_c = 0x63;  // Char constants for the flag letters.
404650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar UChar_i = 0x69;  //   (Damn the lack of Unicode support in C)
404750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar UChar_m = 0x6d;
404850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar UChar_x = 0x78;
404950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar UChar_y = 0x79;
405050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flagStr.indexOf(UChar_i) != -1) {
405150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            flags |= UREGEX_CASE_INSENSITIVE;
405250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
405350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flagStr.indexOf(UChar_m) != -1) {
405450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            flags |= UREGEX_MULTILINE;
405550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
405650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (flagStr.indexOf(UChar_x) != -1) {
405750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            flags |= UREGEX_COMMENTS;
405850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
405950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
406050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
406150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Compile the test pattern.
406250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
406350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
406450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *testPat = RegexPattern::compile(pattern, flags, pe, status);
406550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status == U_REGEX_UNIMPLEMENTED) {
406650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //
406750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Test of a feature that is planned for ICU, but not yet implemented.
406850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   skip the test.
406950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            skippedUnimplementedCount++;
407050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testPat;
407150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
407250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
407350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
407450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
407550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
407650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Some tests are supposed to generate errors.
407750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   Only report an error for tests that are supposed to succeed.
407850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fields[2].indexOf(UChar_c) == -1  &&  // Compilation is not supposed to fail AND
407950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fields[2].indexOf(UChar_i) == -1)     //   it's not an accepted ICU incompatibility
408050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
408150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status));
408250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
408350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
408450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testPat;
408550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
408650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
408750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
408850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fields[2].indexOf(UChar_i) >= 0) {
408950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // ICU should skip this test.
409050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testPat;
409150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
409250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
409350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
409450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fields[2].indexOf(UChar_c) >= 0) {
409550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // This pattern should have caused a compilation error, but didn't/
409650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("line %d: Expected a pattern compile error, got success.", lineNum);
409750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testPat;
409850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
409950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
410050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
410150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
410250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // replace the Perl variables that appear in some of the
410350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   match data strings.
410450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
410550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString matchString = fields[1];
410650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matchString.findAndReplace(nulnulSrc, nulnul);
410750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matchString.findAndReplace(ffffSrc,   ffff);
410850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
410950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Replace any \n in the match string with an actual new-line char.
411050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Don't do full unescape, as this unescapes more than Perl does, which
411150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  causes other spurious failures in the tests.
411250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
411350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
411450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
411550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
411650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
411750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Run the test, check for expected match/don't match result.
411850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
411950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexMatcher *testMat = testPat->matcher(matchString, status);
412050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UBool found = testMat->find();
412150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UBool expected = FALSE;
412250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fields[2].indexOf(UChar_y) >=0) {
412350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            expected = TRUE;
412450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
412550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (expected != found) {
412650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errln("line %d: Expected %smatch, got %smatch",
412750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                lineNum, expected?"":"no ", found?"":"no " );
412850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
412950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
413050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
413150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Don't try to check expected results if there is no match.
413250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   (Some have stuff in the expected fields)
413350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (!found) {
413450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testMat;
413550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete testPat;
413650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
413750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
413850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
413950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
414050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Interpret the Perl expression from the fourth field of the data file,
414150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // building up an ICU string from the results of the ICU match.
414250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   The Perl expression will contain references to the results of
414350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //     a regex match, including the matched string, capture group strings,
414450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //     group starting and ending indicies, etc.
414550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
414650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString resultString;
414750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString perlExpr = fields[3];
414850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if SUPPORT_MUTATING_INPUT_STRING
414950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        groupsMat->reset(perlExpr);
415050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        cgMat->reset(perlExpr);
415150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
415250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
415350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (perlExpr.length() > 0) {
415450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !SUPPORT_MUTATING_INPUT_STRING
415550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  Perferred usage.  Reset after any modification to input string.
415650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            groupsMat->reset(perlExpr);
415750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            cgMat->reset(perlExpr);
415850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
415950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
416050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (perlExpr.startsWith("$&")) {
416150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                resultString.append(testMat->group(status));
416250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, 2);
416350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
416450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
416550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else if (groupsMat->lookingAt(status)) {
416650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // $-[0]   $+[2]  etc.
416750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UnicodeString digitString = groupsMat->group(2, status);
416850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t t = 0;
416950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
417050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UnicodeString plusOrMinus = groupsMat->group(1, status);
417150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t matchPosition;
417250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (plusOrMinus.compare("+") == 0) {
417350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    matchPosition = testMat->end(groupNum, status);
417450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
417550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    matchPosition = testMat->start(groupNum, status);
417650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
417750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (matchPosition != -1) {
417850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ICU_Utility::appendNumber(resultString, matchPosition);
417950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
418050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, groupsMat->end(status));
418150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
418250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
418350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else if (cgMat->lookingAt(status)) {
418450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // $1, $2, $3, etc.
418550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UnicodeString digitString = cgMat->group(1, status);
418650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t t = 0;
418750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
418850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (U_SUCCESS(status)) {
418950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    resultString.append(testMat->group(groupNum, status));
419050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    status = U_ZERO_ERROR;
419150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
419250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, cgMat->end(status));
419350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
419450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
419550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else if (perlExpr.startsWith("@-")) {
419650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t i;
419750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (i=0; i<=testMat->groupCount(); i++) {
419850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (i>0) {
419950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        resultString.append(" ");
420050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
420150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ICU_Utility::appendNumber(resultString, testMat->start(i, status));
420250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
420350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, 2);
420450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
420550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
420650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else if (perlExpr.startsWith("@+")) {
420750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t i;
420850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (i=0; i<=testMat->groupCount(); i++) {
420950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (i>0) {
421050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        resultString.append(" ");
421150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
421250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ICU_Utility::appendNumber(resultString, testMat->end(i, status));
421350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
421450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, 2);
421550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
421650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
421750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) {    // \Escape.  Take following char as a literal.
421850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                     //           or as an escaped sequence (e.g. \n)
421950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (perlExpr.length() > 1) {
422050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    perlExpr.remove(0, 1);  // Remove the '\', but only if not last char.
422150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
422250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c = perlExpr.charAt(0);
422350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                switch (c) {
422450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                case 'n':   c = '\n'; break;
422550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // add any other escape sequences that show up in the test expected results.
422650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
422750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                resultString.append(c);
422850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, 1);
422950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
423050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
423150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            else  {
423250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Any characters from the perl expression that we don't explicitly
423350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //  recognize before here are assumed to be literals and copied
423450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //  as-is to the expected results.
423550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                resultString.append(perlExpr.charAt(0));
423650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                perlExpr.remove(0, 1);
423750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
423850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
423950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (U_FAILURE(status)) {
424050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
424150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
424250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
424350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
424450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
424550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
424650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Expected Results Compare
424750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
424850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString expectedS(fields[4]);
424950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expectedS.findAndReplace(nulnulSrc, nulnul);
425050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expectedS.findAndReplace(ffffSrc,   ffff);
425150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
4252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
425450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (expectedS.compare(resultString) != 0) {
425550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            err("Line %d: Incorrect perl expression results.", lineNum);
425650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\"");
425750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
4258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
425950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete testMat;
426050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete testPat;
4261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
426450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // All done.  Clean up allocated stuff.
4265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
426650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete cgMat;
426750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete cgPat;
4268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
426950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete groupsMat;
427050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete groupsPat;
4271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
427250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete flagMat;
427350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete flagPat;
4274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
427550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete lineMat;
427650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete linePat;
4277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
427850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete fieldPat;
427950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete [] testData;
428050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
428150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
428250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount);
4283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------
4288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
428950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   PerlTestsUTF8  Run Perl's regular expression tests on UTF-8-based UTexts
429050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  (instead of using UnicodeStrings) to test the alternate engine.
429150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  The input file for this test is re_tests, the standard regular
429250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  expression test data distributed with the Perl source code.
429350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  See PerlTests() for more information.
4294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
4295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------
429650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PerlTestsUTF8() {
4297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char tdd[2048];
4298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *srcPath;
4299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode  status = U_ZERO_ERROR;
4300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError pe;
430150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalUConverterPointer UTF8Converter(ucnv_open("UTF-8", &status));
430250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText       patternText = UTEXT_INITIALIZER;
430350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char       *patternChars = NULL;
430450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     patternLength;
430550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     patternCapacity = 0;
430650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText       inputText = UTEXT_INITIALIZER;
430750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char       *inputChars = NULL;
430850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     inputLength;
430950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     inputCapacity = 0;
431050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
431150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
4312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Open and read the test data file.
4315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    srcPath=getPath(tdd, "re_tests.txt");
4317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(srcPath==NULL) {
4318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return; /* something went wrong, error already output */
4319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t    len;
4322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status);
4323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
4324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return; /* something went wrong, error already output */
4325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Put the test data into a UnicodeString
4329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString testDataString(FALSE, testData, len);
4331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Regex to break the input file into lines, and strip the new lines.
4334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     One line per match, capture group one is the desired data.
4335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status);
4337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
4338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        dataerrln("RegexPattern::compile() error");
4339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
4340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher* lineMat = linePat->matcher(testDataString, status);
4342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Regex to split a test file line into fields.
4345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    There are six fields, separated by tabs.
4346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status);
4348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Regex to identify test patterns with flag settings, and to separate them.
4351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    Test patterns with flags look like 'pattern'i
4352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    Test patterns without flags are not quoted:   pattern
4353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Coming out, capture group 2 is the pattern, capture group 3 is the flags.
4354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status);
4356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher* flagMat = flagPat->matcher(status);
4357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The Perl tests reference several perl-isms, which are evaluated/substituted
4360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   in the test data.  Not being perl, this must be done explicitly.  Here
4361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   are string constants and REs for these constructs.
4362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString nulnulSrc("${nulnul}");
4364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeString nulnul("\\u0000\\u0000", -1, US_INV);
4365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    nulnul = nulnul.unescape();
4366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString ffffSrc("${ffff}");
4368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeString ffff("\\uffff", -1, US_INV);
4369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ffff = ffff.unescape();
4370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  regexp for $-[0], $+[2], etc.
4372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status);
4373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *groupsMat = groupsPat->matcher(status);
4374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  regexp for $0, $1, $2, etc.
4376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status);
4377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *cgMat = cgPat->matcher(status);
4378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Main Loop for the Perl Tests, runs once per line from the
4382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   test data file.
4383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t  lineNum = 0;
4385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t  skippedUnimplementedCount = 0;
4386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while (lineMat->find()) {
4387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        lineNum++;
4388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  Get a line, break it into its fields, do the Perl
4391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //    variable substitutions.
4392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString line = lineMat->group(1, status);
4394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString fields[7];
4395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fieldPat->split(line, fields, 7, status);
4396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        flagMat->reset(fields[0]);
4398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        flagMat->matches(status);
4399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString pattern  = flagMat->group(2, status);
4400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pattern.findAndReplace("${bang}", "!");
4401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000"));
4402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pattern.findAndReplace(ffffSrc, ffff);
4403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  Identify patterns that include match flag settings,
4406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //    split off the flags, remove the extra quotes.
4407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString flagStr = flagMat->group(3, status);
4409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(status)) {
4410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
4411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return;
4412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t flags = 0;
4414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar UChar_c = 0x63;  // Char constants for the flag letters.
4415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar UChar_i = 0x69;  //   (Damn the lack of Unicode support in C)
4416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar UChar_m = 0x6d;
4417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar UChar_x = 0x78;
4418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar UChar_y = 0x79;
4419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (flagStr.indexOf(UChar_i) != -1) {
4420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            flags |= UREGEX_CASE_INSENSITIVE;
4421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (flagStr.indexOf(UChar_m) != -1) {
4423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            flags |= UREGEX_MULTILINE;
4424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (flagStr.indexOf(UChar_x) != -1) {
4426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            flags |= UREGEX_COMMENTS;
4427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
442850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
442950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
443050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Put the pattern in a UTF-8 UText
443150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
443250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
443350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        patternLength = pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status);
443450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status == U_BUFFER_OVERFLOW_ERROR) {
443550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
443650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete[] patternChars;
443750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            patternCapacity = patternLength + 1;
443850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            patternChars = new char[patternCapacity];
443950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status);
444050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
444150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&patternText, patternChars, patternLength, &status);
4442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Compile the test pattern.
4445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
444650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexPattern *testPat = RegexPattern::compile(&patternText, flags, pe, status);
4447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (status == U_REGEX_UNIMPLEMENTED) {
4448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //
4449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Test of a feature that is planned for ICU, but not yet implemented.
4450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   skip the test.
4451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            skippedUnimplementedCount++;
4452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete testPat;
4453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status = U_ZERO_ERROR;
4454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
4455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(status)) {
4458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Some tests are supposed to generate errors.
4459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   Only report an error for tests that are supposed to succeed.
4460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (fields[2].indexOf(UChar_c) == -1  &&  // Compilation is not supposed to fail AND
4461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fields[2].indexOf(UChar_i) == -1)     //   it's not an accepted ICU incompatibility
4462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status));
4464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status = U_ZERO_ERROR;
4466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete testPat;
4467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
4468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fields[2].indexOf(UChar_i) >= 0) {
4471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // ICU should skip this test.
4472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete testPat;
4473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
4474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fields[2].indexOf(UChar_c) >= 0) {
4477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // This pattern should have caused a compilation error, but didn't/
4478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("line %d: Expected a pattern compile error, got success.", lineNum);
4479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete testPat;
4480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
4481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
448350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // replace the Perl variables that appear in some of the
4486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //   match data strings.
4487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString matchString = fields[1];
4489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matchString.findAndReplace(nulnulSrc, nulnul);
4490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        matchString.findAndReplace(ffffSrc,   ffff);
4491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Replace any \n in the match string with an actual new-line char.
4493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  Don't do full unescape, as this unescapes more than Perl does, which
4494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  causes other spurious failures in the tests.
4495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
4496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
449750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
449850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Put the input in a UTF-8 UText
449950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //
450050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
450150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        inputLength = matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status);
450250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (status == U_BUFFER_OVERFLOW_ERROR) {
450350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
450450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete[] inputChars;
450550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            inputCapacity = inputLength + 1;
450650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            inputChars = new char[inputCapacity];
450750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status);
450850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
450950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUTF8(&inputText, inputChars, inputLength, &status);
4510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Run the test, check for expected match/don't match result.
4513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4514b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        RegexMatcher *testMat = &testPat->matcher(status)->reset(&inputText);
4515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool found = testMat->find();
4516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool expected = FALSE;
4517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fields[2].indexOf(UChar_y) >=0) {
4518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expected = TRUE;
4519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (expected != found) {
4521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("line %d: Expected %smatch, got %smatch",
4522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                lineNum, expected?"":"no ", found?"":"no " );
4523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
4524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Don't try to check expected results if there is no match.
4527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   (Some have stuff in the expected fields)
4528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (!found) {
4529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            delete testMat;
4530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            delete testPat;
4531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            continue;
4532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Interpret the Perl expression from the fourth field of the data file,
4536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // building up an ICU string from the results of the ICU match.
4537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //   The Perl expression will contain references to the results of
4538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //     a regex match, including the matched string, capture group strings,
4539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //     group starting and ending indicies, etc.
4540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString resultString;
4542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString perlExpr = fields[3];
4543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (perlExpr.length() > 0) {
454550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            groupsMat->reset(perlExpr);
454650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            cgMat->reset(perlExpr);
454750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (perlExpr.startsWith("$&")) {
4549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                resultString.append(testMat->group(status));
4550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, 2);
4551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else if (groupsMat->lookingAt(status)) {
4554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // $-[0]   $+[2]  etc.
4555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString digitString = groupsMat->group(2, status);
4556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t t = 0;
4557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
4558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString plusOrMinus = groupsMat->group(1, status);
4559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t matchPosition;
4560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (plusOrMinus.compare("+") == 0) {
4561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    matchPosition = testMat->end(groupNum, status);
4562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
4563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    matchPosition = testMat->start(groupNum, status);
4564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (matchPosition != -1) {
4566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ICU_Utility::appendNumber(resultString, matchPosition);
4567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, groupsMat->end(status));
4569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else if (cgMat->lookingAt(status)) {
4572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // $1, $2, $3, etc.
4573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString digitString = cgMat->group(1, status);
4574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t t = 0;
4575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
4576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (U_SUCCESS(status)) {
4577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    resultString.append(testMat->group(groupNum, status));
4578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    status = U_ZERO_ERROR;
4579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, cgMat->end(status));
4581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else if (perlExpr.startsWith("@-")) {
4584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t i;
4585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                for (i=0; i<=testMat->groupCount(); i++) {
4586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (i>0) {
4587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        resultString.append(" ");
4588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ICU_Utility::appendNumber(resultString, testMat->start(i, status));
4590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, 2);
4592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else if (perlExpr.startsWith("@+")) {
4595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t i;
4596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                for (i=0; i<=testMat->groupCount(); i++) {
4597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (i>0) {
4598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        resultString.append(" ");
4599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ICU_Utility::appendNumber(resultString, testMat->end(i, status));
4601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, 2);
4603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) {    // \Escape.  Take following char as a literal.
4606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                     //           or as an escaped sequence (e.g. \n)
4607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (perlExpr.length() > 1) {
4608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    perlExpr.remove(0, 1);  // Remove the '\', but only if not last char.
4609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UChar c = perlExpr.charAt(0);
4611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                switch (c) {
4612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                case 'n':   c = '\n'; break;
4613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // add any other escape sequences that show up in the test expected results.
4614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                resultString.append(c);
4616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, 1);
4617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else  {
4620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Any characters from the perl expression that we don't explicitly
4621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //  recognize before here are assumed to be literals and copied
4622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //  as-is to the expected results.
4623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                resultString.append(perlExpr.charAt(0));
4624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                perlExpr.remove(0, 1);
4625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(status)) {
4628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
4629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
4630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Expected Results Compare
4635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
4636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString expectedS(fields[4]);
4637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectedS.findAndReplace(nulnulSrc, nulnul);
4638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectedS.findAndReplace(ffffSrc,   ffff);
4639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
4640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (expectedS.compare(resultString) != 0) {
4643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            err("Line %d: Incorrect perl expression results.", lineNum);
464450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\"");
4645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete testMat;
4648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete testPat;
4649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // All done.  Clean up allocated stuff.
4653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete cgMat;
4655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete cgPat;
4656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete groupsMat;
4658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete groupsPat;
4659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete flagMat;
4661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete flagPat;
4662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete lineMat;
4664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete linePat;
4665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete fieldPat;
4667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete [] testData;
466850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
466950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&patternText);
467050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&inputText);
467150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
467250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete [] patternChars;
467350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete [] inputChars;
4674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount);
4677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4681b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//--------------------------------------------------------------
4682b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//
4683b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//  Bug6149   Verify limits to heap expansion for backtrack stack.
4684b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//             Use this pattern,
4685b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//                 "(a?){1,}"
4686b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//             The zero-length match will repeat forever.
4687b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//                (That this goes into a loop is another bug)
4688b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//
4689b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//---------------------------------------------------------------
4690b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid RegexTest::Bug6149() {
4691b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeString pattern("(a?){1,}");
4692b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeString s("xyz");
4693b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    uint32_t flags = 0;
4694b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
4695b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
4696b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    RegexMatcher  matcher(pattern, s, flags, status);
4697b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UBool result = false;
4698b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    REGEX_ASSERT_FAIL(result=matcher.matches(status), U_REGEX_STACK_OVERFLOW);
4699b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    REGEX_ASSERT(result == FALSE);
4700b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }
4701b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
4702b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
4703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
4704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   Callbacks()    Test the callback function.
4705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                  When set, callbacks occur periodically during matching operations,
4706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                  giving the application code the ability to abort the operation
4707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                  before it's normal completion.
4708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
4709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustruct callBackContext {
4711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexTest        *test;
4712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t          maxCalls;
4713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t          numCalls;
4714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t          lastSteps;
4715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    void reset(int32_t max) {maxCalls=max; numCalls=0; lastSteps=0;};
4716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru};
4717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CDECL_BEGIN
4719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic UBool U_CALLCONV
4720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QuerutestCallBackFn(const void *context, int32_t steps) {
4721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    callBackContext  *info = (callBackContext *)context;
4722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (info->lastSteps+1 != steps) {
4723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        info->test->errln("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
4724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    info->lastSteps = steps;
4726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    info->numCalls++;
4727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return (info->numCalls < info->maxCalls);
4728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
4729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CDECL_END
4730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid RegexTest::Callbacks() {
4732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru   {
4733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Getter returns NULLs if no callback has been set
4734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   The variables that the getter will fill in.
4736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   Init to non-null values so that the action of the getter can be seen.
4737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        const void          *returnedContext = &returnedContext;
4738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        URegexMatchCallback *returnedFn = &testCallBackFn;
4739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
4741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher("x", 0, status);
4742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.getMatchCallback(returnedFn, returnedContext, status);
4744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(returnedFn == NULL);
4746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(returnedContext == NULL);
4747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru   {
4750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Set and Get work
4751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        callBackContext cbInfo = {this, 0, 0, 0};
4752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        const void          *returnedContext;
4753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        URegexMatchCallback *returnedFn;
4754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
4755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);  // A pattern that can run long.
4756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.setMatchCallback(testCallBackFn, &cbInfo, status);
4758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.getMatchCallback(returnedFn, returnedContext, status);
4760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(returnedFn == testCallBackFn);
4762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(returnedContext == &cbInfo);
4763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // A short-running match shouldn't invoke the callback
4765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
4766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        cbInfo.reset(1);
4767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString s = "xxx";
4768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.reset(s);
4769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.matches(status));
4770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(cbInfo.numCalls == 0);
4772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // A medium-length match that runs long enough to invoke the
4774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        //   callback, but not so long that the callback aborts it.
4775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
4776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        cbInfo.reset(4);
4777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s = "aaaaaaaaaaaaaaaaaaab";
4778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.reset(s);
4779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.matches(status)==FALSE);
4780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_CHECK_STATUS;
4781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(cbInfo.numCalls > 0);
4782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // A longer running match that the callback function will abort.
4784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_ZERO_ERROR;
4785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        cbInfo.reset(4);
4786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        s = "aaaaaaaaaaaaaaaaaaaaaaab";
4787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        matcher.reset(s);
4788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(matcher.matches(status)==FALSE);
4789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
4790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        REGEX_ASSERT(cbInfo.numCalls == 4);
4791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
4795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
479650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
479727f654740f2a26ad62a5c155af9199af9e69b889claireho//
479827f654740f2a26ad62a5c155af9199af9e69b889claireho//   FindProgressCallbacks()    Test the find "progress" callback function.
479927f654740f2a26ad62a5c155af9199af9e69b889claireho//                  When set, the find progress callback will be invoked during a find operations
480027f654740f2a26ad62a5c155af9199af9e69b889claireho//                  after each return from a match attempt, giving the application the opportunity
480127f654740f2a26ad62a5c155af9199af9e69b889claireho//                  to terminate a long-running find operation before it's normal completion.
480227f654740f2a26ad62a5c155af9199af9e69b889claireho//
480327f654740f2a26ad62a5c155af9199af9e69b889claireho
480427f654740f2a26ad62a5c155af9199af9e69b889clairehostruct progressCallBackContext {
480527f654740f2a26ad62a5c155af9199af9e69b889claireho    RegexTest        *test;
480627f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t          lastIndex;
480727f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t          maxCalls;
480827f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t          numCalls;
480927f654740f2a26ad62a5c155af9199af9e69b889claireho    void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};
481027f654740f2a26ad62a5c155af9199af9e69b889claireho};
481127f654740f2a26ad62a5c155af9199af9e69b889claireho
481227f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_BEGIN
481327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool U_CALLCONV
481427f654740f2a26ad62a5c155af9199af9e69b889clairehotestProgressCallBackFn(const void *context, int64_t matchIndex) {
481527f654740f2a26ad62a5c155af9199af9e69b889claireho    progressCallBackContext  *info = (progressCallBackContext *)context;
481627f654740f2a26ad62a5c155af9199af9e69b889claireho    info->numCalls++;
481727f654740f2a26ad62a5c155af9199af9e69b889claireho    info->lastIndex = matchIndex;
481827f654740f2a26ad62a5c155af9199af9e69b889claireho//    info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n", matchIndex, info->numCalls);
481927f654740f2a26ad62a5c155af9199af9e69b889claireho    return (info->numCalls < info->maxCalls);
482027f654740f2a26ad62a5c155af9199af9e69b889claireho}
482127f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_END
482227f654740f2a26ad62a5c155af9199af9e69b889claireho
482327f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::FindProgressCallbacks() {
482427f654740f2a26ad62a5c155af9199af9e69b889claireho   {
482527f654740f2a26ad62a5c155af9199af9e69b889claireho        // Getter returns NULLs if no callback has been set
482627f654740f2a26ad62a5c155af9199af9e69b889claireho
482727f654740f2a26ad62a5c155af9199af9e69b889claireho        //   The variables that the getter will fill in.
482827f654740f2a26ad62a5c155af9199af9e69b889claireho        //   Init to non-null values so that the action of the getter can be seen.
482927f654740f2a26ad62a5c155af9199af9e69b889claireho        const void                  *returnedContext = &returnedContext;
483027f654740f2a26ad62a5c155af9199af9e69b889claireho        URegexFindProgressCallback  *returnedFn = &testProgressCallBackFn;
483127f654740f2a26ad62a5c155af9199af9e69b889claireho
483227f654740f2a26ad62a5c155af9199af9e69b889claireho        UErrorCode status = U_ZERO_ERROR;
483327f654740f2a26ad62a5c155af9199af9e69b889claireho        RegexMatcher matcher("x", 0, status);
483427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
483527f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.getFindProgressCallback(returnedFn, returnedContext, status);
483627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
483727f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(returnedFn == NULL);
483827f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(returnedContext == NULL);
483927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
484027f654740f2a26ad62a5c155af9199af9e69b889claireho
484127f654740f2a26ad62a5c155af9199af9e69b889claireho   {
484227f654740f2a26ad62a5c155af9199af9e69b889claireho        // Set and Get work
484327f654740f2a26ad62a5c155af9199af9e69b889claireho        progressCallBackContext cbInfo = {this, 0, 0, 0};
484427f654740f2a26ad62a5c155af9199af9e69b889claireho        const void                  *returnedContext;
484527f654740f2a26ad62a5c155af9199af9e69b889claireho        URegexFindProgressCallback  *returnedFn;
484627f654740f2a26ad62a5c155af9199af9e69b889claireho        UErrorCode status = U_ZERO_ERROR;
484727f654740f2a26ad62a5c155af9199af9e69b889claireho        RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);  // A pattern that can run long.
484827f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
484927f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status);
485027f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
485127f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.getFindProgressCallback(returnedFn, returnedContext, status);
485227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
485327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(returnedFn == testProgressCallBackFn);
485427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(returnedContext == &cbInfo);
485527f654740f2a26ad62a5c155af9199af9e69b889claireho
485627f654740f2a26ad62a5c155af9199af9e69b889claireho        // A short-running match should NOT invoke the callback.
485727f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
485827f654740f2a26ad62a5c155af9199af9e69b889claireho        cbInfo.reset(100);
485927f654740f2a26ad62a5c155af9199af9e69b889claireho        UnicodeString s = "abxxx";
486027f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.reset(s);
486127f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0
486227f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.setTrace(TRUE);
486327f654740f2a26ad62a5c155af9199af9e69b889claireho#endif
486427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(matcher.find(0, status));
486527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
486627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(cbInfo.numCalls == 0);
486727f654740f2a26ad62a5c155af9199af9e69b889claireho
486827f654740f2a26ad62a5c155af9199af9e69b889claireho        // A medium running match that causes matcher.find() to invoke our callback for each index.
486927f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
487027f654740f2a26ad62a5c155af9199af9e69b889claireho        s = "aaaaaaaaaaaaaaaaaaab";
487127f654740f2a26ad62a5c155af9199af9e69b889claireho        cbInfo.reset(s.length()); //  Some upper limit for number of calls that is greater than size of our input string
487227f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.reset(s);
487327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(matcher.find(0, status)==FALSE);
487427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
487527f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25);
487627f654740f2a26ad62a5c155af9199af9e69b889claireho
487727f654740f2a26ad62a5c155af9199af9e69b889claireho        // A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point.
487827f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
487927f654740f2a26ad62a5c155af9199af9e69b889claireho        UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab";
488027f654740f2a26ad62a5c155af9199af9e69b889claireho        cbInfo.reset(s1.length() - 5); //  Bail early somewhere near the end of input string
488127f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.reset(s1);
488227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(matcher.find(0, status)==FALSE);
488327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
488427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5);
488527f654740f2a26ad62a5c155af9199af9e69b889claireho
488627f654740f2a26ad62a5c155af9199af9e69b889claireho#if 0
488727f654740f2a26ad62a5c155af9199af9e69b889claireho        // Now a match that will succeed, but after an interruption
488827f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_ZERO_ERROR;
488927f654740f2a26ad62a5c155af9199af9e69b889claireho        UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx";
489027f654740f2a26ad62a5c155af9199af9e69b889claireho        cbInfo.reset(s2.length() - 10); //  Bail early somewhere near the end of input string
489127f654740f2a26ad62a5c155af9199af9e69b889claireho        matcher.reset(s2);
489227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(matcher.find(0, status)==FALSE);
489327f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
489427f654740f2a26ad62a5c155af9199af9e69b889claireho        // Now retry the match from where left off
489527f654740f2a26ad62a5c155af9199af9e69b889claireho        cbInfo.maxCalls = 100; //  No callback limit
489627f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status));
489727f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_CHECK_STATUS;
489827f654740f2a26ad62a5c155af9199af9e69b889claireho#endif
489927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
490027f654740f2a26ad62a5c155af9199af9e69b889claireho
490127f654740f2a26ad62a5c155af9199af9e69b889claireho
490227f654740f2a26ad62a5c155af9199af9e69b889claireho}
490327f654740f2a26ad62a5c155af9199af9e69b889claireho
490427f654740f2a26ad62a5c155af9199af9e69b889claireho
490550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
490650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
490750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    PreAllocatedUTextCAPI    Check the C API with pre-allocated mutable
490850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                             UTexts. The pure-C implementation of UText
490950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                             has no mutable backing stores, but we can
491050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                             use UnicodeString here to test the functionality.
491150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
491250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------
491350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::PreAllocatedUTextCAPI () {
491450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode           status = U_ZERO_ERROR;
491550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    URegularExpression  *re;
491650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText                patternText = UTEXT_INITIALIZER;
491750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString        buffer;
491850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText                bufferText = UTEXT_INITIALIZER;
491950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
492050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUnicodeString(&bufferText, &buffer, &status);
492150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
492250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
492350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *  getText() and getUText()
492450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
492550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
492650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText  text1 = UTEXT_INITIALIZER;
492750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText  text2 = UTEXT_INITIALIZER;
492850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar  text2Chars[20];
492950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText  *resultText;
493050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
493150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
493227f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status);
493327f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status);
493450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2);
493550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUChars(&text2, text2Chars, -1, &status);
493650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
493727f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status);
493850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        re = uregex_openUText(&patternText, 0, NULL, &status);
493950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
494050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* First set a UText */
494150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setUText(re, &text1, &status);
494250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resultText = uregex_getUText(re, &bufferText, &status);
494350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
494450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(resultText == &bufferText);
494550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(resultText, 0);
494650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(&text1, 0);
4947103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        REGEX_ASSERT(testUTextEqual(resultText, &text1));
494850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
494950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resultText = uregex_getUText(re, &bufferText, &status);
495050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
495150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(resultText == &bufferText);
495250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(resultText, 0);
495350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(&text1, 0);
4954103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        REGEX_ASSERT(testUTextEqual(resultText, &text1));
495550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
495650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* Then set a UChar * */
495750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text2Chars, 7, &status);
495850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resultText = uregex_getUText(re, &bufferText, &status);
495950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
496050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(resultText == &bufferText);
496150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(resultText, 0);
496250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_setNativeIndex(&text2, 0);
4963103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        REGEX_ASSERT(testUTextEqual(resultText, &text2));
496450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
496550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_close(re);
496650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&text1);
496750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&text2);
496850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
496950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
497050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
497150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *  group()
497250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
497350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
497450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar    text1[80];
497550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText   *actual;
497650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UBool    result;
497750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
497850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
497950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
498050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
498150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
498250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
498350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text1, -1, &status);
498450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_find(re, 0, &status);
498550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result==TRUE);
498650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
498750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*  Capture Group 0, the full match.  Should succeed.  */
498850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
498927f654740f2a26ad62a5c155af9199af9e69b889claireho        actual = uregex_groupUTextDeep(re, 0, &bufferText, &status);
499050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
499150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(actual == &bufferText);
499227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual);
499350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
499450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*  Capture group #1.  Should succeed. */
499550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
499627f654740f2a26ad62a5c155af9199af9e69b889claireho        actual = uregex_groupUTextDeep(re, 1, &bufferText, &status);
499750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
499850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(actual == &bufferText);
499927f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual);
500050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
500150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*  Capture group out of range.  Error. */
500250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
500327f654740f2a26ad62a5c155af9199af9e69b889claireho        actual = uregex_groupUTextDeep(re, 2, &bufferText, &status);
500450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
500550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(actual == &bufferText);
500650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
500750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_close(re);
500850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
500950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
501050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
501150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
501250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *  replaceFirst()
501350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
501450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
501550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar    text1[80];
501650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar    text2[80];
501750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText    replText = UTEXT_INITIALIZER;
501850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText   *result;
501950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
502050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
502150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
502250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
502327f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);
502450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
502550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        re = uregex_openC("x(.*?)x", 0, NULL, &status);
502650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
502750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
502850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*  Normal case, with match */
502950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text1, -1, &status);
503050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
503150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
503250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
503350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &bufferText);
503427f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result);
503550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
503650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* No match.  Text should copy to output with no changes.  */
503750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text2, -1, &status);
503850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
503950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
504050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
504150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &bufferText);
504227f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);
504350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
504450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* Unicode escapes */
504550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text1, -1, &status);
504627f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status);
504750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
504850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
504950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
505050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &bufferText);
505127f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result);
505250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
505350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_close(re);
505450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&replText);
505550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
505650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
505750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
505850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
505950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *  replaceAll()
506050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
506150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
506250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar    text1[80];
506350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar    text2[80];
506450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText    replText = UTEXT_INITIALIZER;
506550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText   *result;
506650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
506750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
506850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
506950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
507027f654740f2a26ad62a5c155af9199af9e69b889claireho        regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);
507150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
507250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        re = uregex_openC("x(.*?)x", 0, NULL, &status);
507350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
507450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
507550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*  Normal case, with match */
507650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text1, -1, &status);
507750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
507850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_replaceAllUText(re, &replText, &bufferText, &status);
507950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
508050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &bufferText);
508127f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> <1> <...>.", result);
508250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
508350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* No match.  Text should copy to output with no changes.  */
508450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_setText(re, text2, -1, &status);
508550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
508650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = uregex_replaceAllUText(re, &replText, &bufferText, &status);
508750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_CHECK_STATUS;
508850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        REGEX_ASSERT(result == &bufferText);
508927f654740f2a26ad62a5c155af9199af9e69b889claireho        REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);
509050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
509150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uregex_close(re);
509250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&replText);
509350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
509450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
509550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
509650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
509750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *  splitUText() uses the C++ API directly, and the UnicodeString version uses mutable UTexts,
509850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *   so we don't need to test it here.
509950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
510050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
510150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&bufferText);
510250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&patternText);
510350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
510450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
510550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------
510650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
510750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  Bug7651   Regex pattern that exceeds default operator stack depth in matcher.
510850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
510950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------
511050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexTest::Bug7651() {
511150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString pattern1("((?<![A-Za-z0-9])[#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|(?<![A-Za-z0-9_])[@\\uff20][A-Za-z0-9_]+(?:\\/[\\w-]+)?|(https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|\\$[A-Za-z]+)");
511250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  The following should exceed the default operator stack depth in the matcher, i.e. force the matcher to malloc instead of using fSmallData.
511327f654740f2a26ad62a5c155af9199af9e69b889claireho    //  It will cause a segfault if RegexMatcher tries to use fSmallData instead of malloc'ing the memory needed (see init2) for the pattern operator stack allocation.
511450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString pattern2("((https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|(?<![A-Za-z0-9_])[\\@\\uff20][A-Za-z0-9_]+(?:\\/[\\w\\-]+)?|(?<![A-Za-z0-9])[\\#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|\\$[A-Za-z]+)");
511550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString s("#ff @abcd This is test");
511650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern  *REPattern = NULL;
511750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher  *REMatcher = NULL;
511850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode status = U_ZERO_ERROR;
511950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError pe;
512050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
512150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REPattern = RegexPattern::compile(pattern1, 0, pe, status);
512250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
512350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REMatcher = REPattern->matcher(s, status);
512450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
512550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(REMatcher->find());
512650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(REMatcher->start(status) == 0);
512750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REPattern;
512850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REMatcher;
512950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
513050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
513150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REPattern = RegexPattern::compile(pattern2, 0, pe, status);
513250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
513350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REMatcher = REPattern->matcher(s, status);
513450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_CHECK_STATUS;
513550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(REMatcher->find());
513650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REGEX_ASSERT(REMatcher->start(status) == 0);
513750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REPattern;
513850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete REMatcher;
513950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
514050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }
514150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
514227f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexTest::Bug7740() {
514327f654740f2a26ad62a5c155af9199af9e69b889claireho    UErrorCode status = U_ZERO_ERROR;
514427f654740f2a26ad62a5c155af9199af9e69b889claireho    UnicodeString pattern = "(a)";
514527f654740f2a26ad62a5c155af9199af9e69b889claireho    UnicodeString text = "abcdef";
514627f654740f2a26ad62a5c155af9199af9e69b889claireho    RegexMatcher *m = new RegexMatcher(pattern, text, 0, status);
514727f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_CHECK_STATUS;
514827f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT(m->lookingAt(status));
514927f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_CHECK_STATUS;
515027f654740f2a26ad62a5c155af9199af9e69b889claireho    status = U_ILLEGAL_ARGUMENT_ERROR;
515127f654740f2a26ad62a5c155af9199af9e69b889claireho    UnicodeString s = m->group(1, status);    // Bug 7740: segfault here.
515227f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
515327f654740f2a26ad62a5c155af9199af9e69b889claireho    REGEX_ASSERT(s == "");
515427f654740f2a26ad62a5c155af9199af9e69b889claireho    delete m;
515527f654740f2a26ad62a5c155af9199af9e69b889claireho}
515627f654740f2a26ad62a5c155af9199af9e69b889claireho
5157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Bug 8479:  was crashing whith a Bogus UnicodeString as input.
5158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
5159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::Bug8479() {
5160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UErrorCode status = U_ZERO_ERROR;
516127f654740f2a26ad62a5c155af9199af9e69b889claireho
5162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    RegexMatcher* const pMatcher = new RegexMatcher("\\Aboo\\z", UREGEX_DOTALL|UREGEX_CASE_INSENSITIVE, status);
5163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
5164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_SUCCESS(status))
5165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    {
5166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UnicodeString str;
5167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        str.setToBogus();
5168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pMatcher->reset(str);
5169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_ZERO_ERROR;
5170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pMatcher->matches(status);
5171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
5172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        delete pMatcher;
5173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
5174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
517527f654740f2a26ad62a5c155af9199af9e69b889claireho
517627f654740f2a26ad62a5c155af9199af9e69b889claireho
5177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Bug 7029
5178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::Bug7029() {
5179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UErrorCode status = U_ZERO_ERROR;
5180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
5181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status);
5182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString text = "abc.def";
5183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString splits[10];
5184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
5185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t numFields = pMatcher->split(text, splits, 10, status);
5186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_CHECK_STATUS;
5187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    REGEX_ASSERT(numFields == 8);
5188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete pMatcher;
5189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
5190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
5191103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// Bug 9283
5192103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//   This test is checking for the existance of any supplemental characters that case-fold
5193103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//   to a bmp character.
5194103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//
5195103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//   At the time of this writing there are none. If any should appear in a subsequent release
5196103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//   of Unicode, the code in regular expressions compilation that determines the longest
5197103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//   posssible match for a literal string  will need to be enhanced.
5198103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//
5199103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//   See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength()
5200103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//   for details on what to do in case of a failure of this test.
5201103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//
5202103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusvoid RegexTest::Bug9283() {
5203103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    UErrorCode status = U_ZERO_ERROR;
5204103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]]", status);
5205103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    REGEX_CHECK_STATUS;
5206103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    int32_t index;
5207103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    UChar32 c;
5208103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    for (index=0; ; index++) {
5209103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        c = supplementalsWithCaseFolding.charAt(index);
5210103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if (c == -1) {
5211103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
5212103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
5213103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        UnicodeString cf = UnicodeString(c).foldCase();
5214103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        REGEX_ASSERT(cf.length() >= 2);
5215103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
5216103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
5217103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
5218103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
5219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid RegexTest::CheckInvBufSize() {
5220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  if(inv_next>=INV_BUFSIZ) {
5221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %d )\n",
5222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho          __FILE__, INV_BUFSIZ, inv_next);
5223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  } else {
5224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next);
5225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  }
5226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
5227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
5228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
5229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5230