16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/********************************************************************
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * COPYRIGHT:
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copyright (c) 2002-2013, International Business Machines Corporation and
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * others. All Rights Reserved.
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ********************************************************************/
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   regextst.cpp
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//      ICU Regular Expressions test, part of intltest.
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     NOTE!!
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     PLEASE be careful about ASCII assumptions in this test.
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     This test is one of the worst repeat offenders.
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     If you have questions, contact someone on the ICU PMC
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     who has access to an EBCDIC system.
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "intltest.h"
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_REGULAR_EXPRESSIONS
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/regex.h"
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h"
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ucnv.h"
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h"
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h"
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regextst.h"
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvector.h"
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "util.h"
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <stdlib.h>
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <string.h>
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <stdio.h>
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h"
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uinvchar.h"
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SUPPORT_MUTATING_INPUT_STRING   0
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Test class boilerplate
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexTest::RegexTest()
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexTest::~RegexTest()
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (exec) logln("TestSuite RegexTest: ");
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch (index) {
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 0: name = "Basic";
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) Basic();
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 1: name = "API_Match";
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) API_Match();
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 2: name = "API_Replace";
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) API_Replace();
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 3: name = "API_Pattern";
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) API_Pattern();
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 4:
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_FILE_IO
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            name = "Extended";
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) Extended();
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            name = "skip";
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 5: name = "Errors";
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) Errors();
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 6: name = "PerlTests";
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) PerlTests();
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 7: name = "Callbacks";
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) Callbacks();
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 8: name = "FindProgressCallbacks";
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) FindProgressCallbacks();
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 9: name = "Bug 6149";
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             if (exec) Bug6149();
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             break;
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 10: name = "UTextBasic";
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          if (exec) UTextBasic();
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          break;
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 11: name = "API_Match_UTF8";
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          if (exec) API_Match_UTF8();
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          break;
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 12: name = "API_Replace_UTF8";
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          if (exec) API_Replace_UTF8();
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          break;
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 13: name = "API_Pattern_UTF8";
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          if (exec) API_Pattern_UTF8();
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          break;
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 14: name = "PerlTestsUTF8";
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          if (exec) PerlTestsUTF8();
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          break;
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 15: name = "PreAllocatedUTextCAPI";
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          if (exec) PreAllocatedUTextCAPI();
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          break;
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 16: name = "Bug 7651";
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             if (exec) Bug7651();
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             break;
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 17: name = "Bug 7740";
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) Bug7740();
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 18: name = "Bug 8479";
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) Bug8479();
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 19: name = "Bug 7029";
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) Bug7029();
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 20: name = "CheckInvBufSize";
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) CheckInvBufSize();
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case 21: name = "Bug 9283";
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (exec) Bug9283();
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        default: name = "";
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break; //needed to end loop
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * into ASCII.
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @see utext_openUTF8
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status);
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   Error Checking / Reporting macros used in all of the tests.
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void utextToPrintable(char *buf, int32_t bufLen, UText *text) {
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  int64_t oldIndex = utext_getNativeIndex(text);
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  utext_setNativeIndex(text, 0);
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  char *bufPtr = buf;
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UChar32 c = utext_next32From(text, 0);
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  while ((c != U_SENTINEL) && (bufPtr < buf+bufLen)) {
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (0x000020<=c && c<0x00007e) {
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      *bufPtr = c;
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      sprintf(bufPtr,"U+%04X", c);
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      bufPtr+= strlen(bufPtr)-1;
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      *bufPtr = '%';
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    bufPtr++;
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    c = UTEXT_NEXT32(text);
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  *bufPtr = 0;
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY)
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  char *ebuf = (char*)malloc(bufLen);
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  uprv_eastrncpy((unsigned char*)ebuf, (const unsigned char*)buf, bufLen);
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  uprv_strncpy(buf, ebuf, bufLen);
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  free((void*)ebuf);
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  utext_setNativeIndex(text, oldIndex);
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic char ASSERT_BUF[1024];
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst char* RegexTest::extractToAssertBuf(const UnicodeString& message) {
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(message.length()==0) {
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    strcpy(ASSERT_BUF, "[[empty UnicodeString]]");
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  } else {
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString buf;
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    IntlTest::prettify(message,buf);
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(buf.length()==0) {
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      strcpy(ASSERT_BUF, "[[escape() returned 0 chars]]");
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      buf.extract(0, 0x7FFFFFFF, ASSERT_BUF, sizeof(ASSERT_BUF)-1);
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      if(ASSERT_BUF[0]==0) {
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ASSERT_BUF[0]=0;
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(int32_t i=0;i<buf.length();i++) {
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          UChar ch = buf[i];
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch);
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      }
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0;
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  return ASSERT_BUF;
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);}
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure.  status=%s", \
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                              __FILE__, __LINE__, u_errorName(status)); return;}}
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};}
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgif (status!=errcode) {dataerrln("RegexTest failure at line %d.  Expected status=%s, got %s", \
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    __LINE__, u_errorName(errcode), u_errorName(status));};}
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "RegexTest failure at line %d, from %d.  status=%d\n",__LINE__, (line), status); }}
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}}
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT_UNISTR(ustr,inv) {if (!(ustr==inv)) {errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s,%s) failed \n", __FILE__, __LINE__, extractToAssertBuf(ustr),inv);};}
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool testUTextEqual(UText *uta, UText *utb) {
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 ca = 0;
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 cb = 0;
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_setNativeIndex(uta, 0);
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_setNativeIndex(utb, 0);
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    do {
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ca = utext_next32(uta);
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cb = utext_next32(utb);
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (ca != cb) {
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } while (ca != U_SENTINEL);
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return ca == cb;
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param expected expected text in UTF-8 (not platform) codepage
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::assertUText(const char *expected, UText *actual, const char *file, int line) {
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText expectedText = UTEXT_INITIALIZER;
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&expectedText, expected, -1, &status);
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(status)) {
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      errln("%s:%d: assertUText: error %s calling utext_openUTF8(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected));
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      return;
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(utext_nativeLength(&expectedText)==0 && (strlen(expected)!=0)) {
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      errln("%s:%d: assertUText:  expected is %d utf-8 bytes, but utext_nativeLength(expectedText) returned 0.", file, line, strlen(expected));
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      return;
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_setNativeIndex(actual, 0);
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (!testUTextEqual(&expectedText, actual)) {
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        char buf[201 /*21*/];
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        char expectedBuf[201];
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText);
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("%s:%d: assertUText: Failure: expected \"%s\" (%d chars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual));
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&expectedText);
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param expected invariant (platform local text) input
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::assertUTextInvariant(const char *expected, UText *actual, const char *file, int line) {
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText expectedText = UTEXT_INITIALIZER;
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    regextst_openUTF8FromInvariant(&expectedText, expected, -1, &status);
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(status)) {
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      errln("%s:%d: assertUTextInvariant: error %s calling regextst_openUTF8FromInvariant(expected: %d chars)\n", file, line, u_errorName(status), strlen(expected));
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      return;
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_setNativeIndex(actual, 0);
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (!testUTextEqual(&expectedText, actual)) {
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        char buf[201 /*21*/];
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        char expectedBuf[201];
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText);
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual));
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&expectedText);
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Assumes utf-8 input
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actual), __FILE__, __LINE__)
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Assumes Invariant input
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((expected), (actual), __FILE__, __LINE__)
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This buffer ( inv_buf ) is used to hold the UTF-8 strings
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * passed into utext_openUTF8. An error will be given if
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * INV_BUFSIZ is too small.  It's only used on EBCDIC systems.
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define INV_BUFSIZ 2048 /* increase this if too small */
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int64_t inv_next=0;
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if U_CHARSET_FAMILY!=U_ASCII_FAMILY
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic char inv_buf[INV_BUFSIZ];
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t length, UErrorCode *status) {
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(length==-1) length=strlen(inv);
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if U_CHARSET_FAMILY==U_ASCII_FAMILY
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  inv_next+=length;
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  return utext_openUTF8(ut, inv, length, status);
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(inv_next+length+1>INV_BUFSIZ) {
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fprintf(stderr, "%s:%d Error: INV_BUFSIZ #defined to be %d but needs to be at least %d.\n",
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            __FILE__, __LINE__, INV_BUFSIZ, (inv_next+length+1));
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    *status = U_MEMORY_ALLOCATION_ERROR;
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return NULL;
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  unsigned char *buf = (unsigned char*)inv_buf+inv_next;
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  uprv_aestrncpy(buf, (const uint8_t*)inv, length);
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  inv_next+=length;
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  fprintf(stderr, " Note: INV_BUFSIZ at %d, used=%d\n", INV_BUFSIZ, inv_next);
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  return utext_openUTF8(ut, (const char*)buf, length, status);
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    REGEX_TESTLM       Macro + invocation function to simplify writing quick tests
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       for the LookingAt() and  Match() functions.
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//       usage:
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//          REGEX_TESTLM("pattern",  "input text",  lookingAt expected, matches expected);
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//          The expected results are UBool - TRUE or FALSE.
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//          The input text is unescaped.  The pattern is not.
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_TESTLM(pat, text, looking, match) {doRegexLMTest(pat, text, looking, match, __LINE__);doRegexLMTestUTF8(pat, text, looking, match, __LINE__);}
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line) {
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UnicodeString pattern(pat, -1, US_INV);
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UnicodeString inputText(text, -1, US_INV);
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode          status  = U_ZERO_ERROR;
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError         pe;
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        *REPattern = NULL;
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher        *REMatcher = NULL;
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool               retVal     = TRUE;
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString patString(pat, -1, US_INV);
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REPattern = RegexPattern::compile(patString, 0, pe, status);
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dataerrln("RegexTest failure in RegexPattern::compile() at line %d.  Status = %s",
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            line, u_errorName(status));
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (line==376) { RegexPatternDump(REPattern);}
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString inputString(inputText);
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString unEscapedInput = inputString.unescape();
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REMatcher = REPattern->matcher(unEscapedInput, status);
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("RegexTest failure in REPattern::matcher() at line %d.  Status = %s\n",
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            line, u_errorName(status));
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool actualmatch;
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    actualmatch = REMatcher->lookingAt(status);
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("RegexTest failure in lookingAt() at line %d.  Status = %s\n",
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            line, u_errorName(status));
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        retVal =  FALSE;
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (actualmatch != looking) {
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("RegexTest: wrong return from lookingAt() at line %d.\n", line);
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        retVal = FALSE;
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    actualmatch = REMatcher->matches(status);
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("RegexTest failure in matches() at line %d.  Status = %s\n",
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            line, u_errorName(status));
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        retVal = FALSE;
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (actualmatch != match) {
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("RegexTest: wrong return from matches() at line %d.\n", line);
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        retVal = FALSE;
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (retVal == FALSE) {
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPatternDump(REPattern);
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete REPattern;
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete REMatcher;
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return retVal;
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool looking, UBool match, int32_t line) {
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText               pattern    = UTEXT_INITIALIZER;
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             inputUTF8Length;
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char                *textChars = NULL;
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText               inputText  = UTEXT_INITIALIZER;
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode          status     = U_ZERO_ERROR;
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError         pe;
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        *REPattern = NULL;
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher        *REMatcher = NULL;
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool               retVal     = TRUE;
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    regextst_openUTF8FromInvariant(&pattern, pat, -1, &status);
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REPattern = RegexPattern::compile(&pattern, 0, pe, status);
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8).  Status = %s\n",
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            line, u_errorName(status));
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString inputString(text, -1, US_INV);
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString unEscapedInput = inputString.unescape();
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status));
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status);
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // UTF-8 does not allow unpaired surrogates, so this could actually happen
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        logln("RegexTest unable to convert input to UTF8 at line %d.  Status = %s\n", line, u_errorName(status));
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return TRUE; // not a failure of the Regex engine
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR; // buffer overflow
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    textChars = new char[inputUTF8Length+1];
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(), status);
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&inputText, textChars, inputUTF8Length, &status);
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REMatcher = &REPattern->matcher(status)->reset(&inputText);
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("RegexTest failure in REPattern::matcher() at line %d (UTF8).  Status = %s\n",
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            line, u_errorName(status));
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool actualmatch;
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    actualmatch = REMatcher->lookingAt(status);
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("RegexTest failure in lookingAt() at line %d (UTF8).  Status = %s\n",
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            line, u_errorName(status));
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        retVal =  FALSE;
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (actualmatch != looking) {
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("RegexTest: wrong return from lookingAt() at line %d (UTF8).\n", line);
4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        retVal = FALSE;
4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    actualmatch = REMatcher->matches(status);
4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("RegexTest failure in matches() at line %d (UTF8).  Status = %s\n",
4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            line, u_errorName(status));
4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        retVal = FALSE;
4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (actualmatch != match) {
4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", line);
4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        retVal = FALSE;
4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (retVal == FALSE) {
4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPatternDump(REPattern);
4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete REPattern;
4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete REMatcher;
4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&inputText);
4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&pattern);
4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete[] textChars;
4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return retVal;
4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    REGEX_ERR       Macro + invocation function to simplify writing tests
5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       regex tests for incorrect patterns
5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//       usage:
5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//          REGEX_ERR("pattern",   expected error line, column, expected status);
5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEX_ERR(pat, line, col, status) regex_err(pat, line, col, status, __LINE__);
5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol,
5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                          UErrorCode expectedStatus, int32_t line) {
5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString       pattern(pat);
5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode          status         = U_ZERO_ERROR;
5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError         pe;
5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        *callerPattern = NULL;
5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Compile the caller's pattern
5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString patString(pat);
5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    callerPattern = RegexPattern::compile(patString, 0, pe, status);
5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (status != expectedStatus) {
5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status));
5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (status != U_ZERO_ERROR) {
5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (pe.line != errLine || pe.offset != errCol) {
5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("Line %d: incorrect line/offset from UParseError.  Expected %d/%d; got %d/%d.\n",
5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    line, errLine, errCol, pe.line, pe.offset);
5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete callerPattern;
5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Compile again, using a UTF-8-based UText
5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText patternText = UTEXT_INITIALIZER;
5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    regextst_openUTF8FromInvariant(&patternText, pat, -1, &status);
5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    callerPattern = RegexPattern::compile(&patternText, 0, pe, status);
5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (status != expectedStatus) {
5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_errorName(status));
5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (status != U_ZERO_ERROR) {
5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (pe.line != errLine || pe.offset != errCol) {
5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("Line %d: incorrect line/offset from UParseError.  Expected %d/%d; got %d/%d.\n",
5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    line, errLine, errCol, pe.line, pe.offset);
5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete callerPattern;
5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&patternText);
5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//      Basic      Check for basic functionality of regex pattern matching.
5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                 Avoid the use of REGEX_FIND test macro, which has
5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                 substantial dependencies on basic Regex functionality.
5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Basic() {
5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Debug - slide failing test cases early
5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE);
5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UParseError pe;
5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode  status = U_ZERO_ERROR;
5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern *pattern;
5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unescape(), UREGEX_CASE_INSENSITIVE, pe, status);
5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPatternDump(pattern);
5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz").unescape(), status);
5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UBool result = m->find();
5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("result = %d\n", result);
5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd");
5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX====================");
5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    exit(1);
5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Pattern with parentheses
5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("st(abc)ring", "stabcring thing", TRUE,  FALSE);
5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("st(abc)ring", "stabcring",       TRUE,  TRUE);
5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("st(abc)ring", "stabcrung",       FALSE, FALSE);
5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Patterns with *
6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("st(abc)*ring", "string", TRUE, TRUE);
6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("st(abc)*ring", "stabcring", TRUE, TRUE);
6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("st(abc)*ring", "stabcabcring", TRUE, TRUE);
6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("st(abc)*ring", "stabcabcdring", FALSE, FALSE);
6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("st(abc)*ring", "stabcabcabcring etc.", TRUE, FALSE);
6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a*", "",  TRUE, TRUE);
6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a*", "b", TRUE, FALSE);
6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Patterns with "."
6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM(".", "abc", TRUE, FALSE);
6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("...", "abc", TRUE, TRUE);
6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("....", "abc", FALSE, FALSE);
6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM(".*", "abcxyz123", TRUE, TRUE);
6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("ab.*xyz", "abcdefghij", FALSE, FALSE);
6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("ab.*xyz", "abcdefg...wxyz", TRUE, TRUE);
6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz", TRUE, TRUE);
6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz...", TRUE, FALSE);
6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Patterns with * applied to chars at end of literal string
6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("abc*", "ab", TRUE, TRUE);
6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("abc*", "abccccc", TRUE, TRUE);
6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Supplemental chars match as single chars, not a pair of surrogates.
6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM(".", "\\U00011000", TRUE, TRUE);
6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("...", "\\U00011000x\\U00012002", TRUE, TRUE);
6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("...", "\\U00011000x\\U00012002y", TRUE, FALSE);
6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  UnicodeSets in the pattern
6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("[1-6]", "1", TRUE, TRUE);
6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("[1-6]", "3", TRUE, TRUE);
6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("[1-6]", "7", FALSE, FALSE);
6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);
6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);
6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a[1-6]b", "a3b", TRUE, TRUE);
6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a[0-9]*b", "a123b", TRUE, TRUE);
6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a[0-9]*b", "abc", TRUE, FALSE);
6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("[\\p{Nd}]*", "123456", TRUE, TRUE);
6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("[\\p{Nd}]*", "a123456", TRUE, FALSE);   // note that * matches 0 occurences.
6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("[a][b][[:Zs:]]*", "ab   ", TRUE, TRUE);
6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   OR operator in patterns
6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("(a|b)", "a", TRUE, TRUE);
6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("(a|b)", "b", TRUE, TRUE);
6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("(a|b)", "c", FALSE, FALSE);
6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a|b", "b", TRUE, TRUE);
6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabc", TRUE, TRUE);
6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabdc", TRUE, FALSE);
6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "ac", TRUE, TRUE);
6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "123", TRUE, TRUE);
6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "123", TRUE, TRUE);
6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "222211111czzzzw", TRUE, FALSE);
6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  +
6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("ab+", "abbc", TRUE, FALSE);
6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("ab+c", "ac", FALSE, FALSE);
6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("b+", "", FALSE, FALSE);
6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("(abc|def)+", "defabc", TRUE, TRUE);
6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM(".+y", "zippity dooy dah ", TRUE, FALSE);
6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM(".+y", "zippity dooy", TRUE, TRUE);
6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   ?
6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("ab?", "ab", TRUE, TRUE);
6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("ab?", "a", TRUE, TRUE);
6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("ab?", "ac", TRUE, FALSE);
6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("ab?", "abb", TRUE, FALSE);
6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a(b|c)?d", "abd", TRUE, TRUE);
6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a(b|c)?d", "acd", TRUE, TRUE);
6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a(b|c)?d", "ad", TRUE, TRUE);
6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a(b|c)?d", "abcd", FALSE, FALSE);
6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("a(b|c)?d", "ab", FALSE, FALSE);
6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Escape sequences that become single literal chars, handled internally
6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   by ICU's Unescape.
6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // REGEX_TESTLM("\101\142", "Ab", TRUE, TRUE);      // Octal     TODO: not implemented yet.
6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("\\a", "\\u0007", TRUE, TRUE);        // BEL
6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("\\cL", "\\u000c", TRUE, TRUE);       // Control-L
7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("\\e", "\\u001b", TRUE, TRUE);        // Escape
7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("\\f", "\\u000c", TRUE, TRUE);        // Form Feed
7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("\\n", "\\u000a", TRUE, TRUE);        // new line
7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("\\r", "\\u000d", TRUE, TRUE);        //  CR
7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("\\t", "\\u0009", TRUE, TRUE);        // Tab
7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("\\u1234", "\\u1234", TRUE, TRUE);
7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("\\U00001234", "\\u1234", TRUE, TRUE);
7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM(".*\\Ax", "xyz", TRUE, FALSE);  //  \A matches only at the beginning of input
7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM(".*\\Ax", " xyz", FALSE, FALSE);  //  \A matches only at the beginning of input
7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Escape of special chars in patterns
7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_TESTLM("\\\\\\|\\(\\)\\[\\{\\~\\$\\*\\+\\?\\.", "\\\\|()[{~$*+?.", TRUE, TRUE);
7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    UTextBasic   Check for quirks that are specific to the UText
7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                 implementation.
7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::UTextBasic() {
7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText pattern = UTEXT_INITIALIZER;
7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&pattern, str_abc, -1, &status);
7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher matcher(&pattern, 0, status);
7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText input = UTEXT_INITIALIZER;
7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&input, str_abc, -1, &status);
7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher.reset(&input);
7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher.reset(matcher.inputText());
7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&pattern);
7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&input);
7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//      API_Match   Test that the API for class RegexMatcher
7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  is present and nominally working, but excluding functions
7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  implementing replace operations.
7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Match() {
7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError         pe;
7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode          status=U_ZERO_ERROR;
7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             flags = 0;
7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Debug - slide failing test cases early
7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return;
7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Simple pattern compilation
7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString       re("abc");
7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern        *pat2;
7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pat2 = RegexPattern::compile(re, flags, pe, status);
7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString inStr1 = "abcdef this is a test";
7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString instr2 = "not abc";
7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString empty  = "";
7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Matcher creation and reset.
7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *m1 = pat2->matcher(inStr1, status);
7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->input() == inStr1);
7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(instr2);
7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->input() == instr2);
7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(inStr1);
7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->input() == inStr1);
7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(empty);
7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->input() == empty);
7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m1->pattern() == pat2);
7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  reset(pos, status)
8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(inStr1);
8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(4, status);
8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->input() == inStr1);
8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(-1, status);
8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(0, status);
8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t len = m1->input().length();
8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(len-1, status);
8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(len, status);
8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(len+1, status);
8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // match(pos, status)
8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(instr2);
8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(4, status) == TRUE);
8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset();
8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(3, status) == FALSE);
8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset();
8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(5, status) == FALSE);
8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(4, status) == TRUE);
8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(-1, status) == FALSE);
8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Match() at end of string should fail, but should not
8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  be an error.
8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        len = m1->input().length();
8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(len, status) == FALSE);
8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Match beyond end of string should fail with an error.
8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(len+1, status) == FALSE);
8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Successful match at end of string.
8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR;
8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            RegexMatcher m("A?", 0, status);  // will match zero length string.
8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_CHECK_STATUS;
8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            m.reset(inStr1);
8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            len = inStr1.length();
8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.matches(len, status) == TRUE);
8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_CHECK_STATUS;
8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            m.reset(empty);
8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.matches(0, status) == TRUE);
8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_CHECK_STATUS;
8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // lookingAt(pos, status)
8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(instr2);  // "not abc"
8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(5, status) == FALSE);
8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(3, status) == FALSE);
8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE);
8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        len = m1->input().length();
8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(len, status) == FALSE);
8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(len+1, status) == FALSE);
8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete m1;
8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pat2;
8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Capture Group.
8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     RegexMatcher::start();
8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     RegexMatcher::end();
8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     RegexMatcher::groupCount();
8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t             flags=0;
9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UParseError         pe;
9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode          status=U_ZERO_ERROR;
9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString       re("01(23(45)67)(.*)");
9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString data = "0123456789";
9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *matcher = pat->matcher(data, status);
9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->lookingAt(status) == TRUE);
9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        static const int32_t matchStarts[] = {0,  2, 4, 8};
9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        static const int32_t matchEnds[]   = {10, 8, 6, 10};
9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t i;
9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (i=0; i<4; i++) {
9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t actualStart = matcher->start(i, status);
9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_CHECK_STATUS;
9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (actualStart != matchStarts[i]) {
9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("RegexTest failure at line %d, index %d.  Expected %d, got %d\n",
9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    __LINE__, i, matchStarts[i], actualStart);
9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t actualEnd = matcher->end(i, status);
9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_CHECK_STATUS;
9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (actualEnd != matchEnds[i]) {
9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("RegexTest failure at line %d index %d.  Expected %d, got %d\n",
9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    __LINE__, i, matchEnds[i], actualEnd);
9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(0, status) == matcher->start(status));
9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->end(0, status) == matcher->end(status));
9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->reset();
9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);
9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->lookingAt(status);
9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->group(status)    == "0123456789");
9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->group(0, status) == "0123456789");
9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->group(1, status) == "234567"    );
9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->group(2, status) == "45"        );
9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->group(3, status) == "89"        );
9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->reset();
9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);
9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete matcher;
9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pat;
9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  find
9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t             flags=0;
9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UParseError         pe;
9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode          status=U_ZERO_ERROR;
9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString       re("abc");
9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString data = ".abc..abc...abc..";
9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                    012345678901234567
9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *matcher = pat->matcher(data, status);
9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 1);
9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 6);
9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 12);
9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find() == FALSE);
9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find() == FALSE);
9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->reset();
9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 1);
9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(0, status));
9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 1);
9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(1, status));
9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 1);
9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(2, status));
9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 6);
9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(12, status));
9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 12);
9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(13, status) == FALSE);
9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(16, status) == FALSE);
9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(17, status) == FALSE);
9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE);
9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR);
10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->groupCount() == 0);
10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete matcher;
10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pat;
10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  find, with \G in pattern (true if at the end of a previous match).
10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t             flags=0;
10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UParseError         pe;
10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode          status=U_ZERO_ERROR;
10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString       re(".*?(?:(\\Gabc)|(abc))", -1, US_INV);
10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString data = ".abcabc.abc..";
10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                    012345678901234567
10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *matcher = pat->matcher(data, status);
10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 0);
10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(1, status) == -1);
10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(2, status) == 1);
10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 4);
10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(1, status) == 4);
10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(2, status) == -1);
10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete matcher;
10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pat;
10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   find with zero length matches, match position should bump ahead
10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     to prevent loops.
10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t                 i;
10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode          status=U_ZERO_ERROR;
10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher        m("(?= ?)", 0, status);   // This pattern will zero-length matches anywhere,
10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                      //   using an always-true look-ahead.
10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString s("    ");
10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(s);
10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (i=0; ; i++) {
10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (m.find() == FALSE) {
10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.start(status) == i);
10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.end(status) == i);
10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(i==5);
10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Check that the bump goes over surrogate pairs OK
10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = UNICODE_STRING_SIMPLE("\\U00010001\\U00010002\\U00010003\\U00010004");
10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = s.unescape();
10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(s);
10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (i=0; ; i+=2) {
10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (m.find() == FALSE) {
10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.start(status) == i);
10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.end(status) == i);
10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(i==10);
10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // find() loop breaking test.
10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //        with pattern of /.?/, should see a series of one char matches, then a single
10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //        match of zero length at the end of the input string.
10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t                 i;
10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode          status=U_ZERO_ERROR;
10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher        m(".?", 0, status);
10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString s("    ");
10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(s);
10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (i=0; ; i++) {
10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (m.find() == FALSE) {
10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.start(status) == i);
10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));
10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(i==5);
10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Matchers with no input string behave as if they had an empty input string.
10966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
10976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
10996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
11006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher  m(".?", 0, status);
11016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
11026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.find());
11036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.start(status) == 0);
11046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.input() == "");
11056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
11076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
11086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern  *p = RegexPattern::compile(".", 0, status);
11096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher  *m = p->matcher(status);
11106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
11116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m->find() == FALSE);
11136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m->input() == "");
11146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete m;
11156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete p;
11166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
11196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Regions
11206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
11216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
11226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
11236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString testString("This is test data");
11246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m(".*", testString,  0, status);
11256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
11266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionStart() == 0);
11276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionEnd() == testString.length());
11286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
11296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
11306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.region(2,4, status);
11326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
11336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.matches(status));
11346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.start(status)==2);
11356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.end(status)==4);
11366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
11376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset();
11396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionStart() == 0);
11406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionEnd() == testString.length());
11416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString shorterString("short");
11436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(shorterString);
11446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionStart() == 0);
11456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionEnd() == shorterString.length());
11466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
11486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
11496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
11506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.reset());
11516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
11526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
11546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
11556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.reset());
11566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
11576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
11596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
11606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
11616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.reset());
11626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
11636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));
11656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
11666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.reset());
11676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
11686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
11726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // hitEnd() and requireEnd()
11736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
11746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
11756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
11766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString testString("aabb");
11776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m1(".*", testString,  0, status);
11786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1.lookingAt(status) == TRUE);
11796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1.hitEnd() == TRUE);
11806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1.requireEnd() == FALSE);
11816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
11826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
11846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m2("a*", testString, 0, status);
11856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m2.lookingAt(status) == TRUE);
11866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m2.hitEnd() == FALSE);
11876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m2.requireEnd() == FALSE);
11886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
11896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
11916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m3(".*$", testString, 0, status);
11926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m3.lookingAt(status) == TRUE);
11936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m3.hitEnd() == TRUE);
11946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m3.requireEnd() == TRUE);
11956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
11966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
12006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Compilation error on reset with UChar *
12016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   These were a hazard that people were stumbling over with runtime errors.
12026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Changed them to compiler errors by adding private methods that more closely
12036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   matched the incorrect use of the functions.
12046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
12056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
12066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
12076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
12086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar ucharString[20];
12096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m(".", 0, status);
12106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(ucharString);  // should not compile.
12116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern *p = RegexPattern::compile(".", 0, status);
12136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *m2 = p->matcher(ucharString, status);    //  should not compile.
12146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m3(".", ucharString, 0, status);  //  Should not compile
12166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
12186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
12206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Time Outs.
12216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //       Note:  These tests will need to be changed when the regexp engine is
12226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //              able to detect and cut short the exponential time behavior on
12236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //              this type of match.
12246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
12256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
12266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
12276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    Enough 'a's in the string to cause the match to time out.
12286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       (Each on additonal 'a' doubles the time)
12296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa");
12306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher matcher("(a+)+b", testString, 0, status);
12316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
12326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.getTimeLimit() == 0);
12336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.setTimeLimit(100, status);
12346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.getTimeLimit() == 100);
12356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
12366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_REGEX_TIME_OUT);
12376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
12396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
12406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Few enough 'a's to slip in under the time limit.
12416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString testString("aaaaaaaaaaaaaaaaaa");
12426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher matcher("(a+)+b", testString, 0, status);
12436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
12446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.setTimeLimit(100, status);
12456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
12466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
12476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
12506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Stack Limits
12516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
12526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
12536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
12546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString testString(1000000, 0x41, 1000000);  // Length 1,000,000, filled with 'A'
12556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations
12576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   of the '+', and makes the stack frames larger.
12586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher matcher("(A)+A$", testString, 0, status);
12596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // With the default stack, this match should fail to run
12616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
12626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
12636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // With unlimited stack, it should run
12656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
12666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.setStackLimit(0, status);
12676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
12686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.lookingAt(status) == TRUE);
12696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
12706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.getStackLimit() == 0);
12716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // With a limited stack, it the match should fail
12736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
12746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.setStackLimit(10000, status);
12756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
12766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
12776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.getStackLimit() == 10000);
12786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A pattern that doesn't save state should work with
12816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   a minimal sized stack
12826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
12836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
12846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString testString = "abc";
12856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher matcher("abc", testString, 0, status);
12866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
12876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.setStackLimit(30, status);
12886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
12896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.matches(status) == TRUE);
12906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
12916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.getStackLimit() == 30);
12926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Negative stack sizes should fail
12946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
12956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.setStackLimit(1000, status);
12966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
12976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.setStackLimit(-1, status);
12986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
12996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.getStackLimit() == 1000);
13006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
13046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
13116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
13126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//      API_Replace        API test for class RegexMatcher, testing the
13136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                         Replace family of functions.
13146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
13156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
13166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Replace() {
13176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Replace
13196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             flags=0;
13216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError         pe;
13226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode          status=U_ZERO_ERROR;
13236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString       re("abc");
13256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
13266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
13276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString data = ".abc..abc...abc..";
13286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //                    012345678901234567
13296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher *matcher = pat->matcher(data, status);
13306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Plain vanilla matches.
13336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString  dest;
13356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher->replaceFirst("yz", status);
13366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
13376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == ".yz..abc...abc..");
13386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher->replaceAll("yz", status);
13406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
13416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == ".yz..yz...yz..");
13426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Plain vanilla non-matches.
13456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString d2 = ".abx..abx...abx..";
13476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher->reset(d2);
13486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher->replaceFirst("yz", status);
13496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
13506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == ".abx..abx...abx..");
13516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher->replaceAll("yz", status);
13536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
13546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == ".abx..abx...abx..");
13556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Empty source string
13586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString d3 = "";
13606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher->reset(d3);
13616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher->replaceFirst("yz", status);
13626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
13636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == "");
13646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher->replaceAll("yz", status);
13666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
13676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == "");
13686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Empty substitution string
13716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher->reset(data);              // ".abc..abc...abc.."
13736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher->replaceFirst("", status);
13746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
13756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == "...abc...abc..");
13766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher->replaceAll("", status);
13786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
13796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == "........");
13806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // match whole string
13836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString d4 = "abc";
13856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher->reset(d4);
13866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher->replaceFirst("xyz", status);
13876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
13886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == "xyz");
13896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher->replaceAll("xyz", status);
13916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
13926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == "xyz");
13936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Capture Group, simple case
13966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
13976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString       re2("a(..)");
13986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *pat2 = RegexPattern::compile(re2, flags, pe, status);
13996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
14006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString d5 = "abcdefg";
14016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher *matcher2 = pat2->matcher(d5, status);
14026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
14036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher2->replaceFirst("$1$1", status);
14046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
14056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == "bcbcdefg");
14066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1."), status);
14086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
14096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == "The value of $1 is bc.defg");
14106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher2->replaceFirst("$ by itself, no group number $$$", status);
14126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
14136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == "$ by itself, no group number $$$defg");
14146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U0001D7CF.");
14166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    replacement = replacement.unescape();
14176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = matcher2->replaceFirst(replacement, status);
14186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
14196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(dest == "Supplemental Digit 1 bc.defg");
14206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_FAIL(matcher2->replaceFirst("bad capture group number $5...",status), U_INDEX_OUTOFBOUNDS_ERROR);
14226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
14256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Replacement String with \u hex escapes
14266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
14276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
14286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString  src = "abc 1 abc 2 abc 3";
14296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString  substitute = UNICODE_STRING_SIMPLE("--\\u0043--");
14306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->reset(src);
14316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString  result = matcher->replaceAll(substitute, status);
14326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
14336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == "--C-- 1 --C-- 2 --C-- 3");
14346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
14366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString  src = "abc !";
14376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString  substitute = UNICODE_STRING_SIMPLE("--\\U00010000--");
14386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->reset(src);
14396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString  result = matcher->replaceAll(substitute, status);
14406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
14416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString expected = UnicodeString("--");
14426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expected.append((UChar32)0x10000);
14436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expected.append("-- !");
14446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == expected);
14456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // TODO:  need more through testing of capture substitutions.
14476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Bug 4057
14496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
14506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
14516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
14526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString s = "The matches start with ss and end with ee ss stuff ee fin";
14536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m("ss(.*?)ee", 0, status);
14546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
14556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString result;
14566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Multiple finds do NOT bump up the previous appendReplacement postion.
14586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(s);
14596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find();
14606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find();
14616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.appendReplacement(result, "ooh", status);
14626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
14636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh");
14646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // After a reset into the interior of a string, appendReplacemnt still starts at beginning.
14666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
14676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result.truncate(0);
14686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(10, status);
14696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find();
14706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find();
14716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.appendReplacement(result, "ooh", status);
14726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
14736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh");
14746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // find() at interior of string, appendReplacemnt still starts at beginning.
14766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
14776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result.truncate(0);
14786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset();
14796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find(10, status);
14806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find();
14816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.appendReplacement(result, "ooh", status);
14826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
14836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh");
14846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.appendTail(result);
14866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == "The matches start with ss and end with ee ooh fin");
14876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete matcher2;
14916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat2;
14926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete matcher;
14936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat;
14946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
14986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
14996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//      API_Pattern       Test that the API for class RegexPattern is
15006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                        present and nominally working.
15016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
15026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
15036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Pattern() {
15046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        pata;    // Test default constructor to not crash.
15056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        patb;
15066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pata == patb);
15086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pata == pata);
15096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString re1("abc[a-l][m-z]");
15116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString re2("def");
15126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode    status = U_ZERO_ERROR;
15136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError   pe;
15146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        *pat1 = RegexPattern::compile(re1, 0, pe, status);
15166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        *pat2 = RegexPattern::compile(re2, 0, pe, status);
15176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
15186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1 == *pat1);
15196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1 != pata);
15206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Assign
15226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    patb = *pat1;
15236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(patb == *pat1);
15246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Copy Construct
15266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern patc(*pat1);
15276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(patc == *pat1);
15286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(patb == patc);
15296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1 != pat2);
15306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    patb = *pat2;
15316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(patb != patc);
15326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(patb == *pat2);
15336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Compile with no flags.
15356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern         *pat1a = RegexPattern::compile(re1, pe, status);
15366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1a == *pat1);
15376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1a->flags() == 0);
15396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Compile with different flags should be not equal
15416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        *pat1b = RegexPattern::compile(re1, UREGEX_CASE_INSENSITIVE, pe, status);
15426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
15436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1b != *pat1a);
15456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE);
15466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1a->flags() == 0);
15476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1b;
15486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // clone
15506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *pat1c = pat1->clone();
15516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1c == *pat1);
15526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1c != *pat2);
15536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1c;
15556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1a;
15566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
15576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat2;
15586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
15616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Verify that a matcher created from a cloned pattern works.
15626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     (Jitterbug 3423)
15636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
15646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
15656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode     status     = U_ZERO_ERROR;
15666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern  *pSource    = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\p{L}+"), 0, status);
15676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern  *pClone     = pSource->clone();
15686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete         pSource;
15696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher  *mFromClone = pClone->matcher(status);
15706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
15716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString s = "Hello World";
15726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        mFromClone->reset(s);
15736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(mFromClone->find() == TRUE);
15746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(mFromClone->group(status) == "Hello");
15756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(mFromClone->find() == TRUE);
15766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(mFromClone->group(status) == "World");
15776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(mFromClone->find() == FALSE);
15786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete mFromClone;
15796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pClone;
15806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
15836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   matches convenience API
15846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
15856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(RegexPattern::matches(".*", "random input", pe, status) == TRUE);
15866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
15876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);
15886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
15896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);
15906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
15916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);
15926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
15936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);
15946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
15956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_INDEX_OUTOFBOUNDS_ERROR;
15966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);
15976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
15986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
16016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Split()
16026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
16036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
16046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = RegexPattern::compile(" +",  pe, status);
16056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
16066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString  fields[10];
16076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t n;
16096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("Now is the time", fields, 10, status);
16106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
16116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==4);
16126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="Now");
16136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="is");
16146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="the");
16156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="time");
16166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="");
16176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("Now is the time", fields, 2, status);
16196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
16206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==2);
16216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="Now");
16226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="is the time");
16236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="the");   // left over from previous test
16246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[1] = "*";
16266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
16276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("Now is the time", fields, 1, status);
16286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
16296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==1);
16306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="Now is the time");
16316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="*");
16326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
16336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("    Now       is the time   ", fields, 10, status);
16356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
16366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==6);
16376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="");
16386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="Now");
16396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="is");
16406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="the");
16416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="time");
16426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]=="");
16436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("     ", fields, 10, status);
16456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
16466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==2);
16476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="");
16486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="");
16496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[0] = "foo";
16516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("", fields, 10, status);
16526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
16536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==0);
16546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="foo");
16556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
16576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  split, with a pattern with (capture)
16596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = RegexPattern::compile(UNICODE_STRING_SIMPLE("<(\\w*)>"),  pe, status);
16606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
16616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
16636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status);
16646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
16656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==7);
16666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="");
16676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
16686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
16696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="b");
16706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="the time");
16716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]=="c");
16726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[6]=="");
16736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(status==U_ZERO_ERROR);
16746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("  <a>Now is <b>the time<c>", fields, 10, status);
16766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
16776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==7);
16786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="  ");
16796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
16806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
16816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="b");
16826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="the time");
16836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]=="c");
16846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[6]=="");
16856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
16876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[6] = "foo";
16886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("  <a>Now is <b>the time<c>", fields, 6, status);
16896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
16906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==6);
16916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="  ");
16926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
16936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
16946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="b");
16956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="the time");
16966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]=="");  // All text following "<c>" field delimiter.
16976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[6]=="foo");
16986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
17006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[5] = "foo";
17016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("  <a>Now is <b>the time<c>", fields, 5, status);
17026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==5);
17046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="  ");
17056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
17066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
17076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="b");
17086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="the time<c>");
17096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]=="foo");
17106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
17126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[5] = "foo";
17136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("  <a>Now is <b>the time", fields, 5, status);
17146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==5);
17166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="  ");
17176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
17186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
17196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="b");
17206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="the time");
17216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]=="foo");
17226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
17246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("  <a>Now is <b>the time<c>", fields, 4, status);
17256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==4);
17276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="  ");
17286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
17296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
17306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="the time<c>");
17316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
17326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
17336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = RegexPattern::compile("([-,])",  pe, status);
17356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("1-10,20", fields, 10, status);
17376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==5);
17396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="1");
17406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="-");
17416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="10");
17426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]==",");
17436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="20");
17446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
17456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Test split of string with empty trailing fields
17476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = RegexPattern::compile(",", pe, status);
17486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("a,b,c,", fields, 10, status);
17506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==4);
17526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="a");
17536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="b");
17546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="c");
17556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="");
17566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("a,,,", fields, 10, status);
17586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==4);
17606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="a");
17616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="");
17626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="");
17636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="");
17646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
17656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Split Separator with zero length match.
17676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = RegexPattern::compile(":?", pe, status);
17686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("abc", fields, 10, status);
17706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==5);
17726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="");
17736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
17746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="b");
17756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="c");
17766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="");
17776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
17796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
17816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // RegexPattern::pattern()
17826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
17836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = new RegexPattern();
17846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1->pattern() == "");
17856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
17866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = RegexPattern::compile("(Hello, world)*",  pe, status);
17886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1->pattern() == "(Hello, world)*");
17906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
17916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
17946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // classID functions
17956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
17966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = RegexPattern::compile("(Hello, world)*",  pe, status);
17976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
17986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1->getDynamicClassID() == RegexPattern::getStaticClassID());
17996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1->getDynamicClassID() != NULL);
18006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString Hello("Hello, world.");
18016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher *m = pat1->matcher(Hello, status);
18026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1->getDynamicClassID() != m->getDynamicClassID());
18036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(m->getDynamicClassID() == RegexMatcher::getStaticClassID());
18046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(m->getDynamicClassID() != NULL);
18056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete m;
18066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
18076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
18096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
18116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//      API_Match_UTF8   Test that the alternate engine for class RegexMatcher
18136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       is present and working, but excluding functions
18146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       implementing replace operations.
18156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
18176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Match_UTF8() {
18186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError         pe;
18196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode          status=U_ZERO_ERROR;
18206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             flags = 0;
18216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
18236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Debug - slide failing test cases early
18246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
18256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
18266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
18276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return;
18296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
18306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
18326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Simple pattern compilation
18336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
18346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
18356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText               re = UTEXT_INITIALIZER;
18366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&re, "abc", -1, &status);
18376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_VERBOSE_TEXT(&re);
18386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern        *pat2;
18396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pat2 = RegexPattern::compile(&re, flags, pe, status);
18406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
18416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText input1 = UTEXT_INITIALIZER;
18436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText input2 = UTEXT_INITIALIZER;
18446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText empty  = UTEXT_INITIALIZER;
18456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &status);
18466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_VERBOSE_TEXT(&input1);
18476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status);
18486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_VERBOSE_TEXT(&input2);
18496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUChars(&empty, NULL, 0, &status);
18506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */
18526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t input2Len = strlen("not abc");
18536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
18566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Matcher creation and reset.
18576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
18586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1);
18596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
18606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
18616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_abcdefthisisatest[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x00 }; /* abcdef this is a test */
18626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText());
18636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(&input2);
18646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
18656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_notabc[] = { 0x6e, 0x6f, 0x74, 0x20, 0x61, 0x62, 0x63, 0x00 }; /* not abc */
18666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_notabc, m1->inputText());
18676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(&input1);
18686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText());
18696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
18706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(&empty);
18716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
18726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(utext_nativeLength(&empty) == 0);
18736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
18756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  reset(pos, status)
18766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
18776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(&input1);
18786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(4, status);
18796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
18806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText());
18816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
18826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(-1, status);
18846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
18856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
18866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(0, status);
18886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
18896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
18906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(input1Len-1, status);
18926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
18936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
18946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(input1Len, status);
18966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
18976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
18986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(input1Len+1, status);
19006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
19016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
19026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
19046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // match(pos, status)
19056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
19066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(&input2);
19076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(4, status) == TRUE);
19086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset();
19096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(3, status) == FALSE);
19106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset();
19116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(5, status) == FALSE);
19126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(4, status) == TRUE);
19136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(-1, status) == FALSE);
19146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
19156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Match() at end of string should fail, but should not
19176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  be an error.
19186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
19196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(input2Len, status) == FALSE);
19206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
19216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Match beyond end of string should fail with an error.
19236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
19246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->matches(input2Len+1, status) == FALSE);
19256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
19266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Successful match at end of string.
19286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
19296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR;
19306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            RegexMatcher m("A?", 0, status);  // will match zero length string.
19316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_CHECK_STATUS;
19326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            m.reset(&input1);
19336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.matches(input1Len, status) == TRUE);
19346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_CHECK_STATUS;
19356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            m.reset(&empty);
19366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.matches(0, status) == TRUE);
19376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_CHECK_STATUS;
19386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
19396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
19426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // lookingAt(pos, status)
19436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
19446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
19456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m1->reset(&input2);  // "not abc"
19466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
19476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(5, status) == FALSE);
19486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(3, status) == FALSE);
19496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(4, status) == TRUE);
19506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE);
19516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
19526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
19536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE);
19546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
19556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE);
19566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
19576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete m1;
19596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pat2;
19606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&re);
19626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&input1);
19636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&input2);
19646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&empty);
19656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
19696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Capture Group.
19706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     RegexMatcher::start();
19716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     RegexMatcher::end();
19726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     RegexMatcher::groupCount();
19736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
19746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
19756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t             flags=0;
19766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UParseError         pe;
19776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode          status=U_ZERO_ERROR;
19786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText               re=UTEXT_INITIALIZER;
19796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */
19806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&re, str_01234567_pat, -1, &status);
19816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
19836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
19846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText input = UTEXT_INITIALIZER;
19866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
19876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&input, str_0123456789, -1, &status);
19886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *matcher = &pat->matcher(status)->reset(&input);
19906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
19916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->lookingAt(status) == TRUE);
19926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        static const int32_t matchStarts[] = {0,  2, 4, 8};
19936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        static const int32_t matchEnds[]   = {10, 8, 6, 10};
19946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t i;
19956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (i=0; i<4; i++) {
19966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t actualStart = matcher->start(i, status);
19976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_CHECK_STATUS;
19986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (actualStart != matchStarts[i]) {
19996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("RegexTest failure at %s:%d, index %d.  Expected %d, got %d\n",
20006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      __FILE__, __LINE__, i, matchStarts[i], actualStart);
20016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
20026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t actualEnd = matcher->end(i, status);
20036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_CHECK_STATUS;
20046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (actualEnd != matchEnds[i]) {
20056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("RegexTest failure at %s:%d index %d.  Expected %d, got %d\n",
20066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      __FILE__, __LINE__, i, matchEnds[i], actualEnd);
20076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
20086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
20096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(0, status) == matcher->start(status));
20116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->end(0, status) == matcher->end(status));
20126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
20146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
20156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->reset();
20166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);
20176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->lookingAt(status);
20196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString dest;
20216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText destText = UTEXT_INITIALIZER;
20226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUnicodeString(&destText, &dest, &status);
20236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText *result;
20246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
20256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //	Test shallow-clone API
20266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int64_t   group_len;
20276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->group((UText *)NULL, group_len, status);
20286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
20296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
20306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(result);
20316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->group(0, &destText, group_len, status);
20326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
20336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &destText);
20346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
20356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  destText is now immutable, reopen it
20366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&destText);
20376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUnicodeString(&destText, &dest, &status);
20386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->group(0, NULL, status);
20406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
20416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
20426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(result);
20436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->group(0, &destText, status);
20446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
20456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &destText);
20466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
20476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->group(1, NULL, status);
20496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
20506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */
20516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
20526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(result);
20536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->group(1, &destText, status);
20546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
20556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &destText);
20566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
20576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->group(2, NULL, status);
20596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
20606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */
20616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_45, result);
20626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(result);
20636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->group(2, &destText, status);
20646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
20656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &destText);
20666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_45, result);
20676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->group(3, NULL, status);
20696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
20706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */
20716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_89, result);
20726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(result);
20736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->group(3, &destText, status);
20746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
20756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &destText);
20766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_89, result);
20776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
20796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
20806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->reset();
20816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);
20826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete matcher;
20846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pat;
20856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&destText);
20876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&input);
20886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&re);
20896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
20926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  find
20936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
20946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
20956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t             flags=0;
20966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UParseError         pe;
20976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode          status=U_ZERO_ERROR;
20986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText               re=UTEXT_INITIALIZER;
20996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
21006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&re, str_abc, -1, &status);
21016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
21036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
21046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText input = UTEXT_INITIALIZER;
21056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */
21066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&input, str_abcabcabc, -1, &status);
21076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                      012345678901234567
21086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *matcher = &pat->matcher(status)->reset(&input);
21106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
21116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
21126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 1);
21136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
21146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 6);
21156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
21166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 12);
21176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find() == FALSE);
21186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find() == FALSE);
21196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->reset();
21216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
21226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 1);
21236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(0, status));
21256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 1);
21266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(1, status));
21276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 1);
21286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(2, status));
21296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 6);
21306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(12, status));
21316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 12);
21326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(13, status) == FALSE);
21336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(16, status) == FALSE);
21346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find(17, status) == FALSE);
21356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE);
21366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
21386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
21396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
21406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR);
21416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->groupCount() == 0);
21436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete matcher;
21456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pat;
21466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&input);
21486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&re);
21496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
21506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
21536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  find, with \G in pattern (true if at the end of a previous match).
21546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
21556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
21566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t             flags=0;
21576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UParseError         pe;
21586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode          status=U_ZERO_ERROR;
21596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText               re=UTEXT_INITIALIZER;
21606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x00 }; /* .*?(?:(\\Gabc)|(abc)) */
21616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&re, str_Gabcabc, -1, &status);
21626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
21646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
21666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText input = UTEXT_INITIALIZER;
21676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */
21686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&input, str_abcabcabc, -1, &status);
21696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                      012345678901234567
21706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *matcher = &pat->matcher(status)->reset(&input);
21726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
21736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
21746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 0);
21756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(1, status) == -1);
21766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(2, status) == 1);
21776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->find());
21796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(status) == 4);
21806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(1, status) == 4);
21816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher->start(2, status) == -1);
21826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
21836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete matcher;
21856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pat;
21866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&input);
21886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&re);
21896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
21906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
21926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   find with zero length matches, match position should bump ahead
21936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     to prevent loops.
21946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
21956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
21966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t                 i;
21976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode          status=U_ZERO_ERROR;
21986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher        m("(?= ?)", 0, status);   // This pattern will zero-length matches anywhere,
21996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                      //   using an always-true look-ahead.
22006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
22016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText s = UTEXT_INITIALIZER;
22026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&s, "    ", -1, &status);
22036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(&s);
22046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (i=0; ; i++) {
22056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (m.find() == FALSE) {
22066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
22076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
22086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.start(status) == i);
22096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.end(status) == i);
22106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
22116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(i==5);
22126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Check that the bump goes over characters outside the BMP OK
22146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // "\\U00010001\\U00010002\\U00010003\\U00010004".unescape()...in UTF-8
22156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        unsigned char aboveBMP[] = {0xF0, 0x90, 0x80, 0x81, 0xF0, 0x90, 0x80, 0x82, 0xF0, 0x90, 0x80, 0x83, 0xF0, 0x90, 0x80, 0x84, 0x00};
22166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&s, (char *)aboveBMP, -1, &status);
22176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(&s);
22186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (i=0; ; i+=4) {
22196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (m.find() == FALSE) {
22206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
22216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
22226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.start(status) == i);
22236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.end(status) == i);
22246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
22256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(i==20);
22266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&s);
22286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
22306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // find() loop breaking test.
22316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //        with pattern of /.?/, should see a series of one char matches, then a single
22326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //        match of zero length at the end of the input string.
22336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t                 i;
22346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode          status=U_ZERO_ERROR;
22356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher        m(".?", 0, status);
22366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
22376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText s = UTEXT_INITIALIZER;
22386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&s, "    ", -1, &status);
22396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(&s);
22406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (i=0; ; i++) {
22416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (m.find() == FALSE) {
22426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
22436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
22446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.start(status) == i);
22456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));
22466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
22476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(i==5);
22486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&s);
22506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
22546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Matchers with no input string behave as if they had an empty input string.
22556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
22566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
22586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
22596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher  m(".?", 0, status);
22606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
22616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.find());
22626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.start(status) == 0);
22636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.input() == "");
22646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
22666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
22676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern  *p = RegexPattern::compile(".", 0, status);
22686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher  *m = p->matcher(status);
22696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
22706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m->find() == FALSE);
22726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0);
22736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete m;
22746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete p;
22756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
22786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Regions
22796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
22806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
22816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
22826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText testPattern = UTEXT_INITIALIZER;
22836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText testText    = UTEXT_INITIALIZER;
22846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status);
22856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_VERBOSE_TEXT(&testPattern);
22866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status);
22876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_VERBOSE_TEXT(&testText);
22886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m(&testPattern, &testText, 0, status);
22906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
22916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionStart() == 0);
22926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
22936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
22946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
22956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.region(2,4, status);
22976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
22986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.matches(status));
22996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.start(status)==2);
23006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.end(status)==4);
23016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
23026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset();
23046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionStart() == 0);
23056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
23066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&testText, "short", -1, &status);
23086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_VERBOSE_TEXT(&testText);
23096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(&testText);
23106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionStart() == 0);
23116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short"));
23126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
23146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
23156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
23166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.reset());
23176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
23186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
23206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
23216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.reset());
23226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
23236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
23256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
23266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
23276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.reset());
23286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
23296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));
23316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
23326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(&m == &m.reset());
23336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
23346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&testText);
23366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&testPattern);
23376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
23406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // hitEnd() and requireEnd()
23416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
23426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
23436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
23446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText testPattern = UTEXT_INITIALIZER;
23456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText testText    = UTEXT_INITIALIZER;
23466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */
23476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */
23486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&testPattern, str_, -1, &status);
23496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&testText, str_aabb, -1, &status);
23506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m1(&testPattern, &testText,  0, status);
23526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1.lookingAt(status) == TRUE);
23536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1.hitEnd() == TRUE);
23546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m1.requireEnd() == FALSE);
23556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
23566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
23586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */
23596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&testPattern, str_a, -1, &status);
23606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m2(&testPattern, &testText, 0, status);
23616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m2.lookingAt(status) == TRUE);
23626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m2.hitEnd() == FALSE);
23636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m2.requireEnd() == FALSE);
23646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
23656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
23676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */
23686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&testPattern, str_dotstardollar, -1, &status);
23696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m3(&testPattern, &testText, 0, status);
23706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m3.lookingAt(status) == TRUE);
23716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m3.hitEnd() == TRUE);
23726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(m3.requireEnd() == TRUE);
23736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
23746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&testText);
23766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&testPattern);
23776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
23796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
23826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//      API_Replace_UTF8   API test for class RegexMatcher, testing the
23846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                         Replace family of functions.
23856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
23876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Replace_UTF8() {
23886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
23896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Replace
23906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
23916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             flags=0;
23926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError         pe;
23936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode          status=U_ZERO_ERROR;
23946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText               re=UTEXT_INITIALIZER;
23966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    regextst_openUTF8FromInvariant(&re, "abc", -1, &status);
23976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_VERBOSE_TEXT(&re);
23986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
23996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */
24026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //             012345678901234567
24036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText dataText = UTEXT_INITIALIZER;
24046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&dataText, data, -1, &status);
24056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_VERBOSE_TEXT(&dataText);
24076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText);
24086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
24106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Plain vanilla matches.
24116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
24126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString  dest;
24136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText destText = UTEXT_INITIALIZER;
24146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUnicodeString(&destText, &dest, &status);
24156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText *result;
24166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText replText = UTEXT_INITIALIZER;
24186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */
24206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&replText, str_yz, -1, &status);
24216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_VERBOSE_TEXT(&replText);
24226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceFirst(&replText, NULL, status);
24236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */
24256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result);
24266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
24276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceFirst(&replText, &destText, status);
24286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
24306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result);
24316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceAll(&replText, NULL, status);
24336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_yzyzyz[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x00 }; /* .yz..yz...yz.. */
24356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result);
24366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
24376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
24396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceAll(&replText, &destText, status);
24406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
24426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result);
24436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
24456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Plain vanilla non-matches.
24466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
24476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */
24486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&dataText, str_abxabxabx, -1, &status);
24496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher->reset(&dataText);
24506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceFirst(&replText, NULL, status);
24526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
24546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
24556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceFirst(&replText, &destText, status);
24566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
24586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
24596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceAll(&replText, NULL, status);
24616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
24636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
24646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
24656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceAll(&replText, &destText, status);
24666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
24686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
24696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
24716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Empty source string
24726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
24736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&dataText, NULL, 0, &status);
24746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher->reset(&dataText);
24756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceFirst(&replText, NULL, status);
24776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8("", result);
24796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
24806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceFirst(&replText, &destText, status);
24816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
24836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8("", result);
24846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceAll(&replText, NULL, status);
24866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8("", result);
24886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
24896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceAll(&replText, &destText, status);
24906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
24916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
24926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8("", result);
24936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
24956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Empty substitution string
24966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
24976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.."
24986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher->reset(&dataText);
24996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&replText, NULL, 0, &status);
25016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceFirst(&replText, NULL, status);
25026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */
25046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);
25056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
25066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceFirst(&replText, &destText, status);
25076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
25096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);
25106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceAll(&replText, NULL, status);
25126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_dots[] = { 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x00 }; /* ........ */
25146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_dots, result);
25156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
25166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
25176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceAll(&replText, &destText, status);
25186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
25206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_dots, result);
25216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
25236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // match whole string
25246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
25256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
25266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&dataText, str_abc, -1, &status);
25276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher->reset(&dataText);
25286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_xyz[] = { 0x78, 0x79, 0x7a, 0x00 }; /* xyz */
25306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&replText, str_xyz, -1, &status);
25316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceFirst(&replText, NULL, status);
25326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
25346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
25356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
25366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceFirst(&replText, &destText, status);
25376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
25396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
25406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceAll(&replText, NULL, status);
25426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
25446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
25456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
25466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher->replaceAll(&replText, &destText, status);
25476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
25496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_xyz, result);
25506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
25526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Capture Group, simple case
25536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
25546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */
25556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&re, str_add, -1, &status);
25566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status);
25576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* abcdefg */
25606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&dataText, str_abcdefg, -1, &status);
25616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText);
25626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */
25656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&replText, str_11, -1, &status);
25666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher2->replaceFirst(&replText, NULL, status);
25676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* bcbcdefg */
25696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);
25706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
25716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
25726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher2->replaceFirst(&replText, &destText, status);
25736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
25756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);
25766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */
25786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&replText, str_v, -1, &status);
25796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_VERBOSE_TEXT(&replText);
25806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher2->replaceFirst(&replText, NULL, status);
25816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg */
25836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
25846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
25856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
25866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher2->replaceFirst(&replText, &destText, status);
25876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
25896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
25906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */
25926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);
25936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher2->replaceFirst(&replText, NULL, status);
25946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
25956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */
25966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);
25976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
25986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
25996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher2->replaceFirst(&replText, &destText, status);
26006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
26016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
26026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);
26036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx. */
26056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001D7CF, MATHEMATICAL BOLD DIGIT ONE
26066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //                                 012345678901234567890123456
26076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    supplDigitChars[22] = 0xF0;
26086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    supplDigitChars[23] = 0x9D;
26096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    supplDigitChars[24] = 0x9F;
26106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    supplDigitChars[25] = 0x8F;
26116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status);
26126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher2->replaceFirst(&replText, NULL, status);
26146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
26156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */
26166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);
26176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
26186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
26196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = matcher2->replaceFirst(&replText, &destText, status);
26206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
26216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
26226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);
26236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e,  0x00 }; /* bad capture group number $5..." */
26246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status);
26256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status)), U_INDEX_OUTOFBOUNDS_ERROR);
26266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);
26276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(result);
26286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
26296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, status)), U_INDEX_OUTOFBOUNDS_ERROR);
26306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == &destText);
26316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);
26326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
26346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Replacement String with \u hex escapes
26356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
26366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
26376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61, 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 abc 2 abc 3 */
26386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33, 0x2d, 0x2d, 0x00 }; /* --\u0043-- */
26396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status);
26406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&replText, str_u0043, -1, &status);
26416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->reset(&dataText);
26426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->replaceAll(&replText, NULL, status);
26446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
26456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */
26466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);
26476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(result);
26486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
26496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->replaceAll(&replText, &destText, status);
26506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
26516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &destText);
26526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);
26536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
26546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
26556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */
26566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&dataText, str_abc, -1, &status);
26576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */
26586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&replText, str_U00010000, -1, &status);
26596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->reset(&dataText);
26606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A"
26626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                          0123456789
26636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expected[2] = 0xF0;
26646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expected[3] = 0x90;
26656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expected[4] = 0x80;
26666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expected[5] = 0x80;
26676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->replaceAll(&replText, NULL, status);
26696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
26706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);
26716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(result);
26726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status);
26736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = matcher->replaceAll(&replText, &destText, status);
26746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
26756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &destText);
26766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);
26776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
26786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // TODO:  need more through testing of capture substitutions.
26796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Bug 4057
26816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
26826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
26836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
26846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65, 0x00 }; /* ss(.*?)ee */
26856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66, 0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ss stuff ee fin */
26866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
26876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&re, str_ssee, -1, &status);
26886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&dataText, str_blah, -1, &status);
26896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&replText, str_ooh, -1, &status);
26906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher m(&re, 0, status);
26926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
26936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString result;
26956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText resultText = UTEXT_INITIALIZER;
26966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUnicodeString(&resultText, &result, &status);
26976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Multiple finds do NOT bump up the previous appendReplacement postion.
26996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(&dataText);
27006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find();
27016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find();
27026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.appendReplacement(&resultText, &replText, status);
27036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
27046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_blah2[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */
27056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_blah2, &resultText);
27066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // After a reset into the interior of a string, appendReplacement still starts at beginning.
27086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
27096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result.truncate(0);
27106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUnicodeString(&resultText, &result, &status);
27116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset(10, status);
27126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find();
27136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find();
27146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.appendReplacement(&resultText, &replText, status);
27156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
27166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_blah3[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */
27176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_blah3, &resultText);
27186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // find() at interior of string, appendReplacement still starts at beginning.
27206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
27216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result.truncate(0);
27226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUnicodeString(&resultText, &result, &status);
27236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.reset();
27246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find(10, status);
27256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.find();
27266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.appendReplacement(&resultText, &replText, status);
27276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
27286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */
27296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText);
27306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        m.appendTail(&resultText, status);
27326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */
27336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText);
27346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&resultText);
27366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
27376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete matcher2;
27396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat2;
27406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete matcher;
27416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat;
27426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&dataText);
27446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&replText);
27456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&destText);
27466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&re);
27476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
27486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
27516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
27526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//      API_Pattern_UTF8  Test that the API for class RegexPattern is
27536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                        present and nominally working.
27546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
27556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
27566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::API_Pattern_UTF8() {
27576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        pata;    // Test default constructor to not crash.
27586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        patb;
27596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pata == patb);
27616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pata == pata);
27626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText         re1 = UTEXT_INITIALIZER;
27646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText         re2 = UTEXT_INITIALIZER;
27656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode    status = U_ZERO_ERROR;
27666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError   pe;
27676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */
27696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */
27706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&re1, str_abcalmz, -1, &status);
27716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&re2, str_def, -1, &status);
27726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        *pat1 = RegexPattern::compile(&re1, 0, pe, status);
27746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        *pat2 = RegexPattern::compile(&re2, 0, pe, status);
27756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
27766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1 == *pat1);
27776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1 != pata);
27786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Assign
27806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    patb = *pat1;
27816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(patb == *pat1);
27826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Copy Construct
27846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern patc(*pat1);
27856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(patc == *pat1);
27866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(patb == patc);
27876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1 != pat2);
27886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    patb = *pat2;
27896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(patb != patc);
27906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(patb == *pat2);
27916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Compile with no flags.
27936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern         *pat1a = RegexPattern::compile(&re1, pe, status);
27946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1a == *pat1);
27956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1a->flags() == 0);
27976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Compile with different flags should be not equal
27996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        *pat1b = RegexPattern::compile(&re1, UREGEX_CASE_INSENSITIVE, pe, status);
28006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
28016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1b != *pat1a);
28036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE);
28046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1a->flags() == 0);
28056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1b;
28066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // clone
28086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *pat1c = pat1->clone();
28096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1c == *pat1);
28106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(*pat1c != *pat2);
28116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1c;
28136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1a;
28146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
28156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat2;
28166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&re1);
28186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&re2);
28196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
28226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Verify that a matcher created from a cloned pattern works.
28236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     (Jitterbug 3423)
28246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
28256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
28266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode     status     = U_ZERO_ERROR;
28276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText          pattern    = UTEXT_INITIALIZER;
28286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */
28296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&pattern, str_pL, -1, &status);
28306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern  *pSource    = RegexPattern::compile(&pattern, 0, status);
28326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern  *pClone     = pSource->clone();
28336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete         pSource;
28346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher  *mFromClone = pClone->matcher(status);
28356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
28366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText          input      = UTEXT_INITIALIZER;
28386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */
28396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&input, str_HelloWorld, -1, &status);
28406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        mFromClone->reset(&input);
28416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(mFromClone->find() == TRUE);
28426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(mFromClone->group(status) == "Hello");
28436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(mFromClone->find() == TRUE);
28446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(mFromClone->group(status) == "World");
28456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(mFromClone->find() == FALSE);
28466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete mFromClone;
28476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pClone;
28486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&input);
28506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&pattern);
28516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
28526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
28546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   matches convenience API
28556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
28566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
28576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status  = U_ZERO_ERROR;
28586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText      pattern = UTEXT_INITIALIZER;
28596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText      input   = UTEXT_INITIALIZER;
28606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */
28626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&input, str_randominput, -1, &status);
28636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */
28656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&pattern, str_dotstar, -1, &status);
28666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE);
28676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
28686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
28706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&pattern, str_abc, -1, &status);
28716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);
28726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
28736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */
28756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&pattern, str_nput, -1, &status);
28766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);
28776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
28786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&pattern, str_randominput, -1, &status);
28806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);
28816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
28826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */
28846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&pattern, str_u, -1, &status);
28856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);
28866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
28876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&input, str_abc, -1, &status);
28896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&pattern, str_abc, -1, &status);
28906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
28916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);
28926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
28936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&input);
28956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&pattern);
28966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
28976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
29006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Split()
29016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
29026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
29036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /*  + */
29046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&re1, str_spaceplus, -1, &status);
29056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = RegexPattern::compile(&re1, pe, status);
29066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
29076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString  fields[10];
29086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t n;
29106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("Now is the time", fields, 10, status);
29116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
29126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==4);
29136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="Now");
29146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="is");
29156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="the");
29166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="time");
29176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="");
29186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("Now is the time", fields, 2, status);
29206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
29216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==2);
29226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="Now");
29236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="is the time");
29246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="the");   // left over from previous test
29256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[1] = "*";
29276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
29286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("Now is the time", fields, 1, status);
29296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
29306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==1);
29316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="Now is the time");
29326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="*");
29336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
29346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("    Now       is the time   ", fields, 10, status);
29366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
29376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==6);
29386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="");
29396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="Now");
29406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="is");
29416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="the");
29426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="time");
29436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]=="");
29446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[6]=="");
29456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[2] = "*";
29476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("     ", fields, 10, status);
29486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
29496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==2);
29506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="");
29516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="");
29526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="*");
29536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[0] = "foo";
29556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("", fields, 10, status);
29566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
29576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==0);
29586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="foo");
29596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
29616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  split, with a pattern with (capture)
29636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    regextst_openUTF8FromInvariant(&re1, "<(\\w*)>", -1, &status);
29646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = RegexPattern::compile(&re1,  pe, status);
29656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
29666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
29686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[6] = fields[7] = "*";
29696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status);
29706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
29716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==7);
29726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="");
29736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
29746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
29756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="b");
29766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="the time");
29776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]=="c");
29786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[6]=="");
29796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[7]=="*");
29806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(status==U_ZERO_ERROR);
29816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[6] = fields[7] = "*";
29836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("  <a>Now is <b>the time<c>", fields, 10, status);
29846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
29856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==7);
29866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="  ");
29876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
29886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
29896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="b");
29906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="the time");
29916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]=="c");
29926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[6]=="");
29936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[7]=="*");
29946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
29966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[6] = "foo";
29976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("  <a>Now is <b>the time<c> ", fields, 6, status);
29986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
29996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==6);
30006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="  ");
30016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
30026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
30036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="b");
30046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="the time");
30056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]==" ");
30066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[6]=="foo");
30076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
30096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[5] = "foo";
30106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("  <a>Now is <b>the time<c>", fields, 5, status);
30116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
30126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==5);
30136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="  ");
30146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
30156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
30166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="b");
30176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="the time<c>");
30186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]=="foo");
30196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
30216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fields[5] = "foo";
30226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("  <a>Now is <b>the time", fields, 5, status);
30236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
30246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==5);
30256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="  ");
30266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
30276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
30286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="b");
30296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="the time");
30306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[5]=="foo");
30316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
30336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("  <a>Now is <b>the time<c>", fields, 4, status);
30346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
30356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==4);
30366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="  ");
30376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="a");
30386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="Now is ");
30396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]=="the time<c>");
30406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
30416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
30426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    regextst_openUTF8FromInvariant(&re1, "([-,])", -1, &status);
30446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = RegexPattern::compile(&re1, pe, status);
30456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
30466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    n = pat1->split("1-10,20", fields, 10, status);
30476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
30486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(n==5);
30496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[0]=="1");
30506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[1]=="-");
30516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[2]=="10");
30526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[3]==",");
30536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(fields[4]=="20");
30546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
30556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
30586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // RegexPattern::pattern() and patternText()
30596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
30606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = new RegexPattern();
30616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(pat1->pattern() == "");
30626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText(status));
30636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
30646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char *helloWorldInvariant = "(Hello, world)*";
30656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status);
30666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pat1 = RegexPattern::compile(&re1, pe, status);
30676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
30686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UNISTR(pat1->pattern(),"(Hello, world)*");
30696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status));
30706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pat1;
30716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&re1);
30736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
30746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
30776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
30786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//      Extended       A more thorough check for features of regex patterns
30796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     The test cases are in a separate data file,
30806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       source/tests/testdata/regextst.txt
30816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     A description of the test data format is included in that file.
30826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
30836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
30846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst char *
30866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexTest::getPath(char buffer[2048], const char *filename) {
30876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status=U_ZERO_ERROR;
30886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char *testDataDirectory = IntlTest::getSourceTestData(status);
30896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
30906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("ERROR: loadTestData() failed - %s", u_errorName(status));
30916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
30926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
30936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    strcpy(buffer, testDataDirectory);
30956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    strcat(buffer, filename);
30966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return buffer;
30976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
30986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Extended() {
31006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char tdd[2048];
31016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char *srcPath;
31026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode  status  = U_ZERO_ERROR;
31036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     lineNum = 0;
31046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
31066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Open and read the test data file.
31076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
31086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    srcPath=getPath(tdd, "regextst.txt");
31096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(srcPath==NULL) {
31106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return; /* something went wrong, error already output */
31116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
31126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    len;
31146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *testData = ReadAndConvertFile(srcPath, len, "utf-8", status);
31156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
31166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return; /* something went wrong, error already output */
31176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
31186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
31206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Put the test data into a UnicodeString
31216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
31226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString testString(FALSE, testData, len);
31236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher    quotedStuffMat(UNICODE_STRING_SIMPLE("\\s*([\\'\\\"/])(.*?)\\1"), 0, status);
31256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher    commentMat    (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
31266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher    flagsMat      (UNICODE_STRING_SIMPLE("\\s*([ixsmdteDEGLMQvabtyYzZ2-9]*)([:letter:]*)"), 0, status);
31276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher    lineMat(UNICODE_STRING_SIMPLE("(.*?)\\r?\\n"), testString, 0, status);
31296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString   testPattern;   // The pattern for test from the test file.
31306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString   testFlags;     // the flags   for a test.
31316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString   matchString;   // The marked up string to be used as input
31326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)){
31346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dataerrln("Construct RegexMatcher() error - %s", u_errorName(status));
31356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete [] testData;
31366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
31376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
31386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
31406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Loop over the test data file, once per line.
31416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
31426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (lineMat.find()) {
31436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        lineNum++;
31446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
31456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          errln("%s:%d: ICU Error \"%s\"", srcPath, lineNum, u_errorName(status));
31466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
31476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
31496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString testLine = lineMat.group(1, status);
31506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (testLine.length() == 0) {
31516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
31526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
31536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
31556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Parse the test line.  Skip blank and comment only lines.
31566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Separate out the three main fields - pattern, flags, target.
31576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
31586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        commentMat.reset(testLine);
31606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (commentMat.lookingAt(status)) {
31616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // This line is a comment, or blank.
31626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
31636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
31646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
31666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Pull out the pattern field, remove it from the test file line.
31676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
31686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        quotedStuffMat.reset(testLine);
31696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (quotedStuffMat.lookingAt(status)) {
31706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            testPattern = quotedStuffMat.group(2, status);
31716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            testLine.remove(0, quotedStuffMat.end(0, status));
31726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
31736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("Bad pattern (missing quotes?) at %s:%d", srcPath, lineNum);
31746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
31756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
31766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
31796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Pull out the flags from the test file line.
31806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
31816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        flagsMat.reset(testLine);
31826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        flagsMat.lookingAt(status);                  // Will always match, possibly an empty string.
31836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        testFlags = flagsMat.group(1, status);
31846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (flagsMat.group(2, status).length() > 0) {
31856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("Bad Match flag at line %d. Scanning %c\n",
31866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                lineNum, flagsMat.group(2, status).charAt(0));
31876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
31886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
31896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        testLine.remove(0, flagsMat.end(0, status));
31906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
31926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Pull out the match string, as a whole.
31936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    We'll process the <tags> later.
31946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
31956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        quotedStuffMat.reset(testLine);
31966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (quotedStuffMat.lookingAt(status)) {
31976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            matchString = quotedStuffMat.group(2, status);
31986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            testLine.remove(0, quotedStuffMat.end(0, status));
31996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
32006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("Bad match string at test file line %d", lineNum);
32016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
32026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
32036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
32056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  The only thing left from the input line should be an optional trailing comment.
32066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
32076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        commentMat.reset(testLine);
32086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (commentMat.lookingAt(status) == FALSE) {
32096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("Line %d: unexpected characters at end of test line.", lineNum);
32106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
32116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
32126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
32146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Run the test
32156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
32166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regex_find(testPattern, testFlags, matchString, srcPath, lineNum);
32176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
32186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete [] testData;
32206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
32226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
32266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
32276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    regex_find(pattern, flags, inputString, lineNumber)
32286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
32296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//         Function to run a single test from the Extended (data driven) tests.
32306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//         See file test/testdata/regextst.txt for a description of the
32316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//         pattern and inputString fields, and the allowed flags.
32326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//         lineNumber is the source line in regextst.txt of the test.
32336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
32346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
32356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Set a value into a UVector at position specified by a decimal number in
32386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   a UnicodeString.   This is a utility function needed by the actual test function,
32396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   which follows.
32406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void set(UVector &vec, int32_t val, UnicodeString index) {
32416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode  status=U_ZERO_ERROR;
32426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t  idx = 0;
32436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (int32_t i=0; i<index.length(); i++) {
32446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t d=u_charDigitValue(index.charAt(i));
32456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (d<0) {return;}
32466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        idx = idx*10 + d;
32476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
32486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (vec.size()<idx+1) {vec.addElement(-1, status);}
32496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    vec.setElementAt(val, idx);
32506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
32516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void setInt(UVector &vec, int32_t val, int32_t idx) {
32536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode  status=U_ZERO_ERROR;
32546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (vec.size()<idx+1) {vec.addElement(-1, status);}
32556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    vec.setElementAt(val, idx);
32566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
32576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool utextOffsetToNative(UText *utext, int32_t unistrOffset, int32_t& nativeIndex)
32596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
32606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool couldFind = TRUE;
32616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UTEXT_SETNATIVEINDEX(utext, 0);
32626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i = 0;
32636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (i < unistrOffset) {
32646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 c = UTEXT_NEXT32(utext);
32656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (c != U_SENTINEL) {
32666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            i += U16_LENGTH(c);
32676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
32686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            couldFind = FALSE;
32696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
32706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
32716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
32726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nativeIndex = (int32_t)UTEXT_GETNATIVEINDEX(utext);
32736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return couldFind;
32746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
32756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::regex_find(const UnicodeString &pattern,
32786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           const UnicodeString &flags,
32796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           const UnicodeString &inputString,
32806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           const char *srcPath,
32816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           int32_t line) {
32826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString       unEscapedInput;
32836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString       deTaggedInput;
32846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             patternUTF8Length,      inputUTF8Length;
32866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char                *patternChars  = NULL, *inputChars = NULL;
32876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText               patternText    = UTEXT_INITIALIZER;
32886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText               inputText      = UTEXT_INITIALIZER;
32896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UConverter          *UTF8Converter = NULL;
32906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode          status         = U_ZERO_ERROR;
32926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError         pe;
32936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        *parsePat      = NULL;
32946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher        *parseMatcher  = NULL;
32956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern        *callerPattern = NULL, *UTF8Pattern = NULL;
32966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher        *matcher       = NULL, *UTF8Matcher = NULL;
32976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector             groupStarts(status);
32986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector             groupEnds(status);
32996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector             groupStartsUTF8(status);
33006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector             groupEndsUTF8(status);
33016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool               isMatch        = FALSE, isUTF8Match = FALSE;
33026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool               failed         = FALSE;
33036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             numFinds;
33046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             i;
33056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool               useMatchesFunc   = FALSE;
33066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool               useLookingAtFunc = FALSE;
33076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             regionStart      = -1;
33086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             regionEnd        = -1;
33096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             regionStartUTF8  = -1;
33106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t             regionEndUTF8    = -1;
33116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
33146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Compile the caller's pattern
33156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
33166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t bflags = 0;
33176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x69) >= 0)  { // 'i' flag
33186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bflags |= UREGEX_CASE_INSENSITIVE;
33196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
33206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x78) >= 0)  { // 'x' flag
33216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bflags |= UREGEX_COMMENTS;
33226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
33236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x73) >= 0)  { // 's' flag
33246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bflags |= UREGEX_DOTALL;
33256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
33266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x6d) >= 0)  { // 'm' flag
33276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bflags |= UREGEX_MULTILINE;
33286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
33296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag
33316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;
33326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
33336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag
33346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bflags |= UREGEX_UNIX_LINES;
33356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
33366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag
33376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bflags |= UREGEX_LITERAL;
33386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
33396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    callerPattern = RegexPattern::compile(pattern, bflags, pe, status);
33426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (status != U_ZERO_ERROR) {
33436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        #if UCONFIG_NO_BREAK_ITERATION==1
33446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // 'v' test flag means that the test pattern should not compile if ICU was configured
33456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     to not include break iteration.  RBBI is needed for Unicode word boundaries.
33466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) {
33476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto cleanupAndReturn;
33486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
33496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        #endif
33506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (flags.indexOf((UChar)0x45) >= 0) {  //  flags contain 'E'
33516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Expected pattern compilation error.
33526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (flags.indexOf((UChar)0x64) >= 0) {   // flags contain 'd'
33536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                logln("Pattern Compile returns \"%s\"", u_errorName(status));
33546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
33556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto cleanupAndReturn;
33566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
33576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Unexpected pattern compilation error.
33586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            dataerrln("Line %d: error %s compiling pattern.", line, u_errorName(status));
33596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto cleanupAndReturn;
33606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
33616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
33626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UTF8Converter = ucnv_open("UTF8", &status);
33646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
33656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status);
33676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR; // buffer overflow
33686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    patternChars = new char[patternUTF8Length+1];
33696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status);
33706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status);
33716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (status == U_ZERO_ERROR) {
33736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status);
33746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (status != U_ZERO_ERROR) {
33766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_BREAK_ITERATION==1
33776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // 'v' test flag means that the test pattern should not compile if ICU was configured
33786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //     to not include break iteration.  RBBI is needed for Unicode word boundaries.
33796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORTED_ERROR) {
33806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                goto cleanupAndReturn;
33816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
33826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
33836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (flags.indexOf((UChar)0x45) >= 0) {  //  flags contain 'E'
33846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Expected pattern compilation error.
33856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (flags.indexOf((UChar)0x64) >= 0) {   // flags contain 'd'
33866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(status));
33876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
33886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                goto cleanupAndReturn;
33896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
33906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Unexpected pattern compilation error.
33916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("Line %d: error %s compiling pattern. (UTF8)", line, u_errorName(status));
33926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                goto cleanupAndReturn;
33936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
33946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
33956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
33966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (UTF8Pattern == NULL) {
33986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
33996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line);
34006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
34016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x64) >= 0) {  // 'd' flag
34046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPatternDump(callerPattern);
34056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x45) >= 0) {  // 'E' flag
34086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("%s, Line %d: Expected, but did not get, a pattern compilation error.", srcPath, line);
34096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanupAndReturn;
34106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
34146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Number of times find() should be called on the test string, default to 1
34156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
34166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    numFinds = 1;
34176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i=2; i<=9; i++) {
34186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (flags.indexOf((UChar)(0x30 + i)) >= 0) {   // digit flag
34196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (numFinds != 1) {
34206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("Line %d: more than one digit flag.  Scanning %d.", line, i);
34216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                goto cleanupAndReturn;
34226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
34236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            numFinds = i;
34246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
34256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // 'M' flag.  Use matches() instead of find()
34286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x4d) >= 0) {
34296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        useMatchesFunc = TRUE;
34306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x4c) >= 0) {
34326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        useLookingAtFunc = TRUE;
34336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
34366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Find the tags in the input data, remove them, and record the group boundary
34376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    positions.
34386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
34396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    parsePat = RegexPattern::compile("<(/?)(r|[0-9]+)>", 0, pe, status);
34406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS_L(line);
34416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    unEscapedInput = inputString.unescape();
34436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    parseMatcher = parsePat->matcher(unEscapedInput, status);
34446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS_L(line);
34456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(parseMatcher->find()) {
34466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        parseMatcher->appendReplacement(deTaggedInput, "", status);
34476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
34486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString groupNum = parseMatcher->group(2, status);
34496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (groupNum == "r") {
34506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // <r> or </r>, a region specification within the string
34516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (parseMatcher->group(1, status) == "/") {
34526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                regionEnd = deTaggedInput.length();
34536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
34546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                regionStart = deTaggedInput.length();
34556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
34566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
34576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // <digits> or </digits>, a group match boundary tag.
34586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (parseMatcher->group(1, status) == "/") {
34596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                set(groupEnds, deTaggedInput.length(), groupNum);
34606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
34616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                set(groupStarts, deTaggedInput.length(), groupNum);
34626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
34636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
34646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    parseMatcher->appendTail(deTaggedInput);
34666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_L(groupStarts.size() == groupEnds.size(), line);
34676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if ((regionStart>=0 || regionEnd>=0) && (regionStart<0 || regionStart>regionEnd)) {
34686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      errln("mismatched <r> tags");
34696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      failed = TRUE;
34706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      goto cleanupAndReturn;
34716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
34746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Configure the matcher according to the flags specified with this test.
34756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
34766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher = callerPattern->matcher(deTaggedInput, status);
34776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS_L(line);
34786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x74) >= 0) {   //  't' trace flag
34796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->setTrace(TRUE);
34806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (UTF8Pattern != NULL) {
34836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status);
34846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR; // buffer overflow
34856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        inputChars = new char[inputUTF8Length+1];
34866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, status);
34876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status);
34886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (status == U_ZERO_ERROR) {
34906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText);
34916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_CHECK_STATUS_L(line);
34926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
34936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (UTF8Matcher == NULL) {
34956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
34966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line);
34976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR;
34986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
34996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
35006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
35026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Generate native indices for UTF8 versions of region and capture group info
35036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
35046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (UTF8Matcher != NULL) {
35056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (regionStart>=0)    (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8);
35066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (regionEnd>=0)      (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);
35076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Fill out the native index UVector info.
35096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Only need 1 loop, from above we know groupStarts.size() = groupEnds.size()
35106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (i=0; i<groupStarts.size(); i++) {
35116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  start = groupStarts.elementAti(i);
35126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting
35136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (start >= 0) {
35146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  startUTF8;
35156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (!utextOffsetToNative(&inputText, start, startUTF8)) {
35166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    errln("Error at line %d: could not find native index for group start %d.  UTF16 index %d", line, i, start);
35176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    failed = TRUE;
35186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
35196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
35206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                setInt(groupStartsUTF8, startUTF8, i);
35216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
35226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  end = groupEnds.elementAti(i);
35246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting
35256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (end >= 0) {
35266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  endUTF8;
35276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (!utextOffsetToNative(&inputText, end, endUTF8)) {
35286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    errln("Error at line %d: could not find native index for group end %d.  UTF16 index %d", line, i, end);
35296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    failed = TRUE;
35306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
35316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
35326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                setInt(groupEndsUTF8, endUTF8, i);
35336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
35346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
35356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
35366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (regionStart>=0) {
35386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       matcher->region(regionStart, regionEnd, status);
35396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       REGEX_CHECK_STATUS_L(line);
35406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       if (UTF8Matcher != NULL) {
35416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           UTF8Matcher->region(regionStartUTF8, regionEndUTF8, status);
35426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           REGEX_CHECK_STATUS_L(line);
35436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       }
35446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
35456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x61) >= 0) {   //  'a' anchoring bounds flag
35466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->useAnchoringBounds(FALSE);
35476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (UTF8Matcher != NULL) {
35486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UTF8Matcher->useAnchoringBounds(FALSE);
35496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
35506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
35516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x62) >= 0) {   //  'b' transparent bounds flag
35526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->useTransparentBounds(TRUE);
35536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (UTF8Matcher != NULL) {
35546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UTF8Matcher->useTransparentBounds(TRUE);
35556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
35566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
35576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
35616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Do a find on the de-tagged input using the caller's pattern
35626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     TODO: error on count>1 and not find().
35636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //           error on both matches() and lookingAt().
35646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
35656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i=0; i<numFinds; i++) {
35666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (useMatchesFunc) {
35676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isMatch = matcher->matches(status);
35686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (UTF8Matcher != NULL) {
35696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               isUTF8Match = UTF8Matcher->matches(status);
35706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
35716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else  if (useLookingAtFunc) {
35726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isMatch = matcher->lookingAt(status);
35736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (UTF8Matcher != NULL) {
35746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                isUTF8Match = UTF8Matcher->lookingAt(status);
35756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
35766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
35776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isMatch = matcher->find();
35786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (UTF8Matcher != NULL) {
35796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                isUTF8Match = UTF8Matcher->find();
35806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
35816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
35826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
35836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matcher->setTrace(FALSE);
35846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
35856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d. ICU ErrorCode is %s", u_errorName(status));
35866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
35876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
35896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Match up the groups from the find() with the groups from the tags
35906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
35916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // number of tags should match number of groups from find operation.
35936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // matcher->groupCount does not include group 0, the entire match, hence the +1.
35946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   G option in test means that capture group data is not available in the
35956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     expected results, so the check needs to be suppressed.
35966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (isMatch == FALSE && groupStarts.size() != 0) {
35976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dataerrln("Error at line %d:  Match expected, but none found.", line);
35986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
35996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanupAndReturn;
36006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if (UTF8Matcher != NULL && isUTF8Match == FALSE && groupStarts.size() != 0) {
36016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d:  Match expected, but none found. (UTF8)", line);
36026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
36036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanupAndReturn;
36046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (flags.indexOf((UChar)0x47 /*G*/) >= 0) {
36076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Only check for match / no match.  Don't check capture groups.
36086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (isMatch && groupStarts.size() == 0) {
36096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("Error at line %d:  No match expected, but one found.", line);
36106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            failed = TRUE;
36116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if (UTF8Matcher != NULL && isUTF8Match && groupStarts.size() == 0) {
36126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("Error at line %d:  No match expected, but one found. (UTF8)", line);
36136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            failed = TRUE;
36146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
36156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanupAndReturn;
36166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS_L(line);
36196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i=0; i<=matcher->groupCount(); i++) {
36206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t  expectedStart = (i >= groupStarts.size()? -1 : groupStarts.elementAti(i));
36216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t  expectedStartUTF8 = (i >= groupStartsUTF8.size()? -1 : groupStartsUTF8.elementAti(i));
36226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (matcher->start(i, status) != expectedStart) {
36236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("Error at line %d: incorrect start position for group %d.  Expected %d, got %d",
36246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                line, i, expectedStart, matcher->start(i, status));
36256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            failed = TRUE;
36266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
36276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expectedStartUTF8) {
36286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("Error at line %d: incorrect start position for group %d.  Expected %d, got %d (UTF8)",
36296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                  line, i, expectedStartUTF8, UTF8Matcher->start(i, status));
36306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            failed = TRUE;
36316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
36326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
36336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t  expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i));
36356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t  expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i));
36366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (matcher->end(i, status) != expectedEnd) {
36376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("Error at line %d: incorrect end position for group %d.  Expected %d, got %d",
36386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                line, i, expectedEnd, matcher->end(i, status));
36396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            failed = TRUE;
36406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Error on end position;  keep going; real error is probably yet to come as group
36416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   end positions work from end of the input data towards the front.
36426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expectedEndUTF8) {
36436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("Error at line %d: incorrect end position for group %d.  Expected %d, got %d (UTF8)",
36446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                  line, i, expectedEndUTF8, UTF8Matcher->end(i, status));
36456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            failed = TRUE;
36466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Error on end position;  keep going; real error is probably yet to come as group
36476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   end positions work from end of the input data towards the front.
36486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
36496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if ( matcher->groupCount()+1 < groupStarts.size()) {
36516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d: Expected %d capture groups, found %d.",
36526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            line, groupStarts.size()-1, matcher->groupCount());
36536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
36546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
36556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else if (UTF8Matcher != NULL && UTF8Matcher->groupCount()+1 < groupStarts.size()) {
36566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d: Expected %d capture groups, found %d. (UTF8)",
36576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              line, groupStarts.size()-1, UTF8Matcher->groupCount());
36586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
36596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if ((flags.indexOf((UChar)0x59) >= 0) &&   //  'Y' flag:  RequireEnd() == false
36626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->requireEnd() == TRUE) {
36636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d: requireEnd() returned TRUE.  Expected FALSE", line);
36646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
36656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) &&   //  'Y' flag:  RequireEnd() == false
36666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UTF8Matcher->requireEnd() == TRUE) {
36676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d: requireEnd() returned TRUE.  Expected FALSE (UTF8)", line);
36686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
36696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if ((flags.indexOf((UChar)0x79) >= 0) &&   //  'y' flag:  RequireEnd() == true
36726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->requireEnd() == FALSE) {
36736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d: requireEnd() returned FALSE.  Expected TRUE", line);
36746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
36756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) &&   //  'Y' flag:  RequireEnd() == false
36766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UTF8Matcher->requireEnd() == FALSE) {
36776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d: requireEnd() returned FALSE.  Expected TRUE (UTF8)", line);
36786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
36796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if ((flags.indexOf((UChar)0x5A) >= 0) &&   //  'Z' flag:  hitEnd() == false
36826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->hitEnd() == TRUE) {
36836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d: hitEnd() returned TRUE.  Expected FALSE", line);
36846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
36856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) &&   //  'Z' flag:  hitEnd() == false
36866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               UTF8Matcher->hitEnd() == TRUE) {
36876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d: hitEnd() returned TRUE.  Expected FALSE (UTF8)", line);
36886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
36896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if ((flags.indexOf((UChar)0x7A) >= 0) &&   //  'z' flag:  hitEnd() == true
36926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher->hitEnd() == FALSE) {
36936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d: hitEnd() returned FALSE.  Expected TRUE", line);
36946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
36956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) &&   //  'z' flag:  hitEnd() == true
36966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               UTF8Matcher->hitEnd() == FALSE) {
36976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error at line %d: hitEnd() returned FALSE.  Expected TRUE (UTF8)", line);
36986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        failed = TRUE;
36996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
37006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgcleanupAndReturn:
37036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (failed) {
37046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        infoln((UnicodeString)"\""+pattern+(UnicodeString)"\"  "
37056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            +flags+(UnicodeString)"  \""+inputString+(UnicodeString)"\"");
37066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // callerPattern->dump();
37076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
37086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete parseMatcher;
37096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete parsePat;
37106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete UTF8Matcher;
37116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete UTF8Pattern;
37126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete matcher;
37136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete callerPattern;
37146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&inputText);
37166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete[] inputChars;
37176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&patternText);
37186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete[] patternChars;
37196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_close(UTF8Converter);
37206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
37216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
37266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
37276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//      Errors     Check for error handling in patterns.
37286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
37296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
37306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Errors() {
37316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // \escape sequences that aren't implemented yet.
37326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //REGEX_ERR("hex format \\x{abcd} not implemented", 1, 13, U_REGEX_UNIMPLEMENTED);
37336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Missing close parentheses
37356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("Comment (?# with no close", 1, 25, U_REGEX_MISMATCHED_PAREN);
37366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("Capturing Parenthesis(...", 1, 25, U_REGEX_MISMATCHED_PAREN);
37376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("Grouping only parens (?: blah blah", 1, 34, U_REGEX_MISMATCHED_PAREN);
37386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Extra close paren
37406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("Grouping only parens (?: blah)) blah", 1, 31, U_REGEX_MISMATCHED_PAREN);
37416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR(")))))))", 1, 1, U_REGEX_MISMATCHED_PAREN);
37426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("(((((((", 1, 7, U_REGEX_MISMATCHED_PAREN);
37436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Look-ahead, Look-behind
37456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  TODO:  add tests for unbounded length look-behinds.
37466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc(?<@xyz).*", 1, 7, U_REGEX_RULE_SYNTAX);       // illegal construct
37476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Attempt to use non-default flags
37496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
37506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UParseError   pe;
37516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode    status = U_ZERO_ERROR;
37526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t       flags  = UREGEX_CANON_EQ |
37536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                               UREGEX_COMMENTS         | UREGEX_DOTALL   |
37546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                               UREGEX_MULTILINE;
37556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern *pat1= RegexPattern::compile(".*", flags, pe, status);
37566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_REGEX_UNIMPLEMENTED);
37576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pat1;
37586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
37596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Quantifiers are allowed only after something that can be quantified.
37626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("+", 1, 1, U_REGEX_RULE_SYNTAX);
37636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc\ndef(*2)", 2, 5, U_REGEX_RULE_SYNTAX);
37646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc**", 1, 5, U_REGEX_RULE_SYNTAX);
37656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Mal-formed {min,max} quantifiers
37676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc{a,2}",1,5, U_REGEX_BAD_INTERVAL);
37686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc{4,2}",1,8, U_REGEX_MAX_LT_MIN);
37696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc{1,b}",1,7, U_REGEX_BAD_INTERVAL);
37706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc{1,,2}",1,7, U_REGEX_BAD_INTERVAL);
37716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc{1,2a}",1,8, U_REGEX_BAD_INTERVAL);
37726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc{222222222222222222222}",1,14, U_REGEX_NUMBER_TOO_BIG);
37736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc{5,50000000000}", 1, 17, U_REGEX_NUMBER_TOO_BIG);        // Overflows int during scan
37746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc{5,687865858}", 1, 16, U_REGEX_NUMBER_TOO_BIG);          // Overflows regex binary format
37756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("abc{687865858,687865859}", 1, 24, U_REGEX_NUMBER_TOO_BIG);
37766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Ticket 5389
37786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX);
37796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Invalid Back Reference \0
37816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    For ICU 3.8 and earlier
37826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    For ICU versions newer than 3.8, \0 introduces an octal escape.
37836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
37846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE);
37856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
37876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------
37906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
37916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Read a text data file, convert it to UChars, and return the data
37926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    in one big UChar * buffer, which the caller must delete.
37936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
37946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
37956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
37966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                     const char *defEncoding, UErrorCode &status) {
37976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar       *retPtr  = NULL;
37986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char        *fileBuf = NULL;
37996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UConverter* conv     = NULL;
38006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    FILE        *f       = NULL;
38016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ulen = 0;
38036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
38046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return retPtr;
38056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
38066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
38086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Open the file.
38096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
38106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    f = fopen(fileName, "rb");
38116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (f == 0) {
38126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dataerrln("Error opening test data file %s\n", fileName);
38136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_FILE_ACCESS_ERROR;
38146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
38156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
38166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
38176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Read it in
38186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
38196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t            fileSize;
38206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t            amt_read;
38216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fseek( f, 0, SEEK_END);
38236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fileSize = ftell(f);
38246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fileBuf = new char[fileSize];
38256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fseek(f, 0, SEEK_SET);
38266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    amt_read = fread(fileBuf, 1, fileSize, f);
38276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (amt_read != fileSize || fileSize <= 0) {
38286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error reading test data file.");
38296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanUpAndReturn;
38306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
38316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
38336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Look for a Unicode Signature (BOM) on the data just read
38346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
38356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t        signatureLength;
38366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char *   fileBufC;
38376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char*    encoding;
38386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fileBufC = fileBuf;
38406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    encoding = ucnv_detectUnicodeSignature(
38416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fileBuf, fileSize, &signatureLength, &status);
38426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(encoding!=NULL ){
38436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fileBufC  += signatureLength;
38446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fileSize  -= signatureLength;
38456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
38466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        encoding = defEncoding;
38476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (strcmp(encoding, "utf-8") == 0) {
38486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("file %s is missing its BOM", fileName);
38496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
38506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
38516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
38536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Open a converter to take the rule file to UTF-16
38546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
38556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    conv = ucnv_open(encoding, &status);
38566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
38576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanUpAndReturn;
38586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
38596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
38616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Convert the rules to UChar.
38626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Preflight first to determine required buffer size.
38636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
38646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ulen = ucnv_toUChars(conv,
38656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        NULL,           //  dest,
38666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        0,              //  destCapacity,
38676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fileBufC,
38686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fileSize,
38696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        &status);
38706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (status == U_BUFFER_OVERFLOW_ERROR) {
38716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Buffer Overflow is expected from the preflight operation.
38726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
38736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        retPtr = new UChar[ulen+1];
38756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ucnv_toUChars(conv,
38766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            retPtr,       //  dest,
38776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ulen+1,
38786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fileBufC,
38796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fileSize,
38806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            &status);
38816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
38826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgcleanUpAndReturn:
38846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fclose(f);
38856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete[] fileBuf;
38866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_close(conv);
38876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
38886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
38896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete []retPtr;
38906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        retPtr = 0;
38916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ulen   = 0;
38926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    };
38936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return retPtr;
38946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
38956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------
38986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
38996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   PerlTests  - Run Perl's regular expression tests
39006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                The input file for this test is re_tests, the standard regular
39016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                expression test data distributed with the Perl source code.
39026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                Here is Perl's description of the test data file:
39046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # The tests are in a separate file 't/op/re_tests'.
39066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # Each line in that file is a separate test.
39076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # There are five columns, separated by tabs.
39086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #
39096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # Column 1 contains the pattern, optionally enclosed in C<''>.
39106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # Modifiers can be put after the closing C<'>.
39116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #
39126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # Column 2 contains the string to be matched.
39136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #
39146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # Column 3 contains the expected result:
39156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #     y   expect a match
39166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #     n   expect no match
39176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #     c   expect an error
39186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # B   test exposes a known bug in Perl, should be skipped
39196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # b   test exposes a known bug in Perl, should be skipped if noamp
39206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #
39216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # Columns 4 and 5 are used only if column 3 contains C<y> or C<c>.
39226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #
39236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # Column 4 contains a string, usually C<$&>.
39246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #
39256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # Column 5 contains the expected result of double-quote
39266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # interpolating that string after the match, or start of error message.
39276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #
39286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # Column 6, if present, contains a reason why the test is skipped.
39296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # This is printed with "skipped", for harness to pick up.
39306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #
39316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # \n in the tests are interpolated, as are variables of the form ${\w+}.
39326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        #
39336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # If you want to add a regular expression test that can't be expressed
39346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        # in this format, don't add it here: put it in op/pat.t instead.
39356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        For ICU, if field 3 contains an 'i', the test will be skipped.
39376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        The test exposes is some known incompatibility between ICU and Perl regexps.
39386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//        (The i is in addition to whatever was there before.)
39396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------
39416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::PerlTests() {
39426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char tdd[2048];
39436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char *srcPath;
39446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode  status = U_ZERO_ERROR;
39456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError pe;
39466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Open and read the test data file.
39496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    srcPath=getPath(tdd, "re_tests.txt");
39516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(srcPath==NULL) {
39526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return; /* something went wrong, error already output */
39536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
39546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    len;
39566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status);
39576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
39586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return; /* something went wrong, error already output */
39596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
39606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Put the test data into a UnicodeString
39636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString testDataString(FALSE, testData, len);
39656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Regex to break the input file into lines, and strip the new lines.
39686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     One line per match, capture group one is the desired data.
39696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status);
39716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
39726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dataerrln("RegexPattern::compile() error");
39736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
39746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
39756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher* lineMat = linePat->matcher(testDataString, status);
39766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Regex to split a test file line into fields.
39796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    There are six fields, separated by tabs.
39806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status);
39826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Regex to identify test patterns with flag settings, and to separate them.
39856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    Test patterns with flags look like 'pattern'i
39866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    Test patterns without flags are not quoted:   pattern
39876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Coming out, capture group 2 is the pattern, capture group 3 is the flags.
39886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status);
39906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher* flagMat = flagPat->matcher(status);
39916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // The Perl tests reference several perl-isms, which are evaluated/substituted
39946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   in the test data.  Not being perl, this must be done explicitly.  Here
39956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   are string constants and REs for these constructs.
39966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
39976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString nulnulSrc("${nulnul}");
39986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString nulnul("\\u0000\\u0000", -1, US_INV);
39996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nulnul = nulnul.unescape();
40006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString ffffSrc("${ffff}");
40026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString ffff("\\uffff", -1, US_INV);
40036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ffff = ffff.unescape();
40046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  regexp for $-[0], $+[2], etc.
40066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status);
40076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher *groupsMat = groupsPat->matcher(status);
40086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  regexp for $0, $1, $2, etc.
40106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status);
40116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher *cgMat = cgPat->matcher(status);
40126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
40156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Main Loop for the Perl Tests, runs once per line from the
40166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   test data file.
40176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
40186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t  lineNum = 0;
40196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t  skippedUnimplementedCount = 0;
40206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (lineMat->find()) {
40216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        lineNum++;
40226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
40246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Get a line, break it into its fields, do the Perl
40256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    variable substitutions.
40266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
40276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString line = lineMat->group(1, status);
40286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString fields[7];
40296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fieldPat->split(line, fields, 7, status);
40306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        flagMat->reset(fields[0]);
40326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        flagMat->matches(status);
40336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString pattern  = flagMat->group(2, status);
40346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pattern.findAndReplace("${bang}", "!");
40356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000"));
40366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pattern.findAndReplace(ffffSrc, ffff);
40376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
40396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Identify patterns that include match flag settings,
40406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    split off the flags, remove the extra quotes.
40416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
40426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString flagStr = flagMat->group(3, status);
40436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
40446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
40456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return;
40466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
40476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t flags = 0;
40486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar UChar_c = 0x63;  // Char constants for the flag letters.
40496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar UChar_i = 0x69;  //   (Damn the lack of Unicode support in C)
40506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar UChar_m = 0x6d;
40516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar UChar_x = 0x78;
40526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar UChar_y = 0x79;
40536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (flagStr.indexOf(UChar_i) != -1) {
40546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            flags |= UREGEX_CASE_INSENSITIVE;
40556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
40566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (flagStr.indexOf(UChar_m) != -1) {
40576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            flags |= UREGEX_MULTILINE;
40586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
40596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (flagStr.indexOf(UChar_x) != -1) {
40606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            flags |= UREGEX_COMMENTS;
40616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
40626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
40646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Compile the test pattern.
40656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
40666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
40676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern *testPat = RegexPattern::compile(pattern, flags, pe, status);
40686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (status == U_REGEX_UNIMPLEMENTED) {
40696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
40706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Test of a feature that is planned for ICU, but not yet implemented.
40716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   skip the test.
40726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            skippedUnimplementedCount++;
40736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testPat;
40746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR;
40756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
40766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
40776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
40796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Some tests are supposed to generate errors.
40806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   Only report an error for tests that are supposed to succeed.
40816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fields[2].indexOf(UChar_c) == -1  &&  // Compilation is not supposed to fail AND
40826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fields[2].indexOf(UChar_i) == -1)     //   it's not an accepted ICU incompatibility
40836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
40846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status));
40856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
40866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR;
40876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testPat;
40886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
40896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
40906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fields[2].indexOf(UChar_i) >= 0) {
40926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // ICU should skip this test.
40936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testPat;
40946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
40956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
40966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fields[2].indexOf(UChar_c) >= 0) {
40986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // This pattern should have caused a compilation error, but didn't/
40996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("line %d: Expected a pattern compile error, got success.", lineNum);
41006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testPat;
41016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
41026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
41036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
41056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // replace the Perl variables that appear in some of the
41066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   match data strings.
41076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
41086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString matchString = fields[1];
41096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matchString.findAndReplace(nulnulSrc, nulnul);
41106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matchString.findAndReplace(ffffSrc,   ffff);
41116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Replace any \n in the match string with an actual new-line char.
41136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Don't do full unescape, as this unescapes more than Perl does, which
41146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  causes other spurious failures in the tests.
41156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
41166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
41206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Run the test, check for expected match/don't match result.
41216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
41226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *testMat = testPat->matcher(matchString, status);
41236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UBool found = testMat->find();
41246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UBool expected = FALSE;
41256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fields[2].indexOf(UChar_y) >=0) {
41266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            expected = TRUE;
41276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
41286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (expected != found) {
41296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("line %d: Expected %smatch, got %smatch",
41306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                lineNum, expected?"":"no ", found?"":"no " );
41316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
41326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
41336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Don't try to check expected results if there is no match.
41356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   (Some have stuff in the expected fields)
41366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!found) {
41376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testMat;
41386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testPat;
41396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
41406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
41416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
41436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Interpret the Perl expression from the fourth field of the data file,
41446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // building up an ICU string from the results of the ICU match.
41456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   The Perl expression will contain references to the results of
41466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     a regex match, including the matched string, capture group strings,
41476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     group starting and ending indicies, etc.
41486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
41496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString resultString;
41506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString perlExpr = fields[3];
41516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if SUPPORT_MUTATING_INPUT_STRING
41526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        groupsMat->reset(perlExpr);
41536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cgMat->reset(perlExpr);
41546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
41556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (perlExpr.length() > 0) {
41576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !SUPPORT_MUTATING_INPUT_STRING
41586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  Perferred usage.  Reset after any modification to input string.
41596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            groupsMat->reset(perlExpr);
41606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            cgMat->reset(perlExpr);
41616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
41626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (perlExpr.startsWith("$&")) {
41646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                resultString.append(testMat->group(status));
41656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, 2);
41666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
41676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (groupsMat->lookingAt(status)) {
41696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // $-[0]   $+[2]  etc.
41706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeString digitString = groupsMat->group(2, status);
41716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t t = 0;
41726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
41736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeString plusOrMinus = groupsMat->group(1, status);
41746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t matchPosition;
41756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (plusOrMinus.compare("+") == 0) {
41766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    matchPosition = testMat->end(groupNum, status);
41776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
41786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    matchPosition = testMat->start(groupNum, status);
41796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
41806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (matchPosition != -1) {
41816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ICU_Utility::appendNumber(resultString, matchPosition);
41826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
41836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, groupsMat->end(status));
41846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
41856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (cgMat->lookingAt(status)) {
41876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // $1, $2, $3, etc.
41886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeString digitString = cgMat->group(1, status);
41896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t t = 0;
41906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
41916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (U_SUCCESS(status)) {
41926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    resultString.append(testMat->group(groupNum, status));
41936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    status = U_ZERO_ERROR;
41946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
41956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, cgMat->end(status));
41966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
41976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (perlExpr.startsWith("@-")) {
41996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t i;
42006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (i=0; i<=testMat->groupCount(); i++) {
42016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (i>0) {
42026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        resultString.append(" ");
42036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
42046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ICU_Utility::appendNumber(resultString, testMat->start(i, status));
42056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
42066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, 2);
42076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
42086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (perlExpr.startsWith("@+")) {
42106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t i;
42116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (i=0; i<=testMat->groupCount(); i++) {
42126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (i>0) {
42136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        resultString.append(" ");
42146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
42156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ICU_Utility::appendNumber(resultString, testMat->end(i, status));
42166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
42176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, 2);
42186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
42196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) {    // \Escape.  Take following char as a literal.
42216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                     //           or as an escaped sequence (e.g. \n)
42226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (perlExpr.length() > 1) {
42236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    perlExpr.remove(0, 1);  // Remove the '\', but only if not last char.
42246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
42256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar c = perlExpr.charAt(0);
42266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                switch (c) {
42276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                case 'n':   c = '\n'; break;
42286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // add any other escape sequences that show up in the test expected results.
42296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
42306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                resultString.append(c);
42316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, 1);
42326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
42336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else  {
42356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Any characters from the perl expression that we don't explicitly
42366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  recognize before here are assumed to be literals and copied
42376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  as-is to the expected results.
42386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                resultString.append(perlExpr.charAt(0));
42396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, 1);
42406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
42416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U_FAILURE(status)) {
42436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
42446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
42456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
42466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
42496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Expected Results Compare
42506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
42516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString expectedS(fields[4]);
42526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expectedS.findAndReplace(nulnulSrc, nulnul);
42536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expectedS.findAndReplace(ffffSrc,   ffff);
42546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
42556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (expectedS.compare(resultString) != 0) {
42586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            err("Line %d: Incorrect perl expression results.", lineNum);
42596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\"");
42606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete testMat;
42636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete testPat;
42646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
42656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
42676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // All done.  Clean up allocated stuff.
42686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
42696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete cgMat;
42706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete cgPat;
42716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete groupsMat;
42736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete groupsPat;
42746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete flagMat;
42766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete flagPat;
42776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete lineMat;
42796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete linePat;
42806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fieldPat;
42826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete [] testData;
42836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount);
42866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
42886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------
42916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
42926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   PerlTestsUTF8  Run Perl's regular expression tests on UTF-8-based UTexts
42936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  (instead of using UnicodeStrings) to test the alternate engine.
42946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  The input file for this test is re_tests, the standard regular
42956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  expression test data distributed with the Perl source code.
42966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  See PerlTests() for more information.
42976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
42986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------
42996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::PerlTestsUTF8() {
43006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char tdd[2048];
43016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char *srcPath;
43026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode  status = U_ZERO_ERROR;
43036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError pe;
43046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    LocalUConverterPointer UTF8Converter(ucnv_open("UTF-8", &status));
43056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText       patternText = UTEXT_INITIALIZER;
43066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char       *patternChars = NULL;
43076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     patternLength;
43086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     patternCapacity = 0;
43096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText       inputText = UTEXT_INITIALIZER;
43106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char       *inputChars = NULL;
43116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     inputLength;
43126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     inputCapacity = 0;
43136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
43156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Open and read the test data file.
43186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    srcPath=getPath(tdd, "re_tests.txt");
43206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(srcPath==NULL) {
43216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return; /* something went wrong, error already output */
43226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
43236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    len;
43256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *testData = ReadAndConvertFile(srcPath, len, "iso-8859-1", status);
43266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
43276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return; /* something went wrong, error already output */
43286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
43296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Put the test data into a UnicodeString
43326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString testDataString(FALSE, testData, len);
43346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Regex to break the input file into lines, and strip the new lines.
43376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     One line per match, capture group one is the desired data.
43386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status);
43406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
43416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dataerrln("RegexPattern::compile() error");
43426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
43436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
43446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher* lineMat = linePat->matcher(testDataString, status);
43456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Regex to split a test file line into fields.
43486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    There are six fields, separated by tabs.
43496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status);
43516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Regex to identify test patterns with flag settings, and to separate them.
43546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    Test patterns with flags look like 'pattern'i
43556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    Test patterns without flags are not quoted:   pattern
43566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Coming out, capture group 2 is the pattern, capture group 3 is the flags.
43576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status);
43596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher* flagMat = flagPat->matcher(status);
43606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // The Perl tests reference several perl-isms, which are evaluated/substituted
43636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   in the test data.  Not being perl, this must be done explicitly.  Here
43646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   are string constants and REs for these constructs.
43656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString nulnulSrc("${nulnul}");
43676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString nulnul("\\u0000\\u0000", -1, US_INV);
43686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nulnul = nulnul.unescape();
43696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString ffffSrc("${ffff}");
43716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString ffff("\\uffff", -1, US_INV);
43726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ffff = ffff.unescape();
43736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  regexp for $-[0], $+[2], etc.
43756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status);
43766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher *groupsMat = groupsPat->matcher(status);
43776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  regexp for $0, $1, $2, etc.
43796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status);
43806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher *cgMat = cgPat->matcher(status);
43816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Main Loop for the Perl Tests, runs once per line from the
43856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   test data file.
43866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t  lineNum = 0;
43886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t  skippedUnimplementedCount = 0;
43896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (lineMat->find()) {
43906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        lineNum++;
43916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
43936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Get a line, break it into its fields, do the Perl
43946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    variable substitutions.
43956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
43966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString line = lineMat->group(1, status);
43976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString fields[7];
43986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fieldPat->split(line, fields, 7, status);
43996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        flagMat->reset(fields[0]);
44016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        flagMat->matches(status);
44026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString pattern  = flagMat->group(2, status);
44036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pattern.findAndReplace("${bang}", "!");
44046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000"));
44056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pattern.findAndReplace(ffffSrc, ffff);
44066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
44086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Identify patterns that include match flag settings,
44096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    split off the flags, remove the extra quotes.
44106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
44116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString flagStr = flagMat->group(3, status);
44126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
44136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
44146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return;
44156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
44166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t flags = 0;
44176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar UChar_c = 0x63;  // Char constants for the flag letters.
44186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar UChar_i = 0x69;  //   (Damn the lack of Unicode support in C)
44196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar UChar_m = 0x6d;
44206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar UChar_x = 0x78;
44216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar UChar_y = 0x79;
44226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (flagStr.indexOf(UChar_i) != -1) {
44236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            flags |= UREGEX_CASE_INSENSITIVE;
44246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
44256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (flagStr.indexOf(UChar_m) != -1) {
44266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            flags |= UREGEX_MULTILINE;
44276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
44286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (flagStr.indexOf(UChar_x) != -1) {
44296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            flags |= UREGEX_COMMENTS;
44306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
44316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
44336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Put the pattern in a UTF-8 UText
44346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
44356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
44366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        patternLength = pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status);
44376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (status == U_BUFFER_OVERFLOW_ERROR) {
44386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR;
44396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete[] patternChars;
44406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            patternCapacity = patternLength + 1;
44416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            patternChars = new char[patternCapacity];
44426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            pattern.extract(patternChars, patternCapacity, UTF8Converter.getAlias(), status);
44436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
44446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&patternText, patternChars, patternLength, &status);
44456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
44476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Compile the test pattern.
44486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
44496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexPattern *testPat = RegexPattern::compile(&patternText, flags, pe, status);
44506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (status == U_REGEX_UNIMPLEMENTED) {
44516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
44526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Test of a feature that is planned for ICU, but not yet implemented.
44536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   skip the test.
44546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            skippedUnimplementedCount++;
44556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testPat;
44566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR;
44576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
44586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
44596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
44616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Some tests are supposed to generate errors.
44626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   Only report an error for tests that are supposed to succeed.
44636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fields[2].indexOf(UChar_c) == -1  &&  // Compilation is not supposed to fail AND
44646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fields[2].indexOf(UChar_i) == -1)     //   it's not an accepted ICU incompatibility
44656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
44666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("line %d: ICU Error \"%s\"\n", lineNum, u_errorName(status));
44676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
44686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR;
44696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testPat;
44706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
44716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
44726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fields[2].indexOf(UChar_i) >= 0) {
44746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // ICU should skip this test.
44756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testPat;
44766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
44776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
44786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fields[2].indexOf(UChar_c) >= 0) {
44806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // This pattern should have caused a compilation error, but didn't/
44816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("line %d: Expected a pattern compile error, got success.", lineNum);
44826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testPat;
44836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
44846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
44856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
44886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // replace the Perl variables that appear in some of the
44896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   match data strings.
44906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
44916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString matchString = fields[1];
44926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matchString.findAndReplace(nulnulSrc, nulnul);
44936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matchString.findAndReplace(ffffSrc,   ffff);
44946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Replace any \n in the match string with an actual new-line char.
44966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Don't do full unescape, as this unescapes more than Perl does, which
44976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  causes other spurious failures in the tests.
44986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
44996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
45016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Put the input in a UTF-8 UText
45026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
45036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
45046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        inputLength = matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status);
45056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (status == U_BUFFER_OVERFLOW_ERROR) {
45066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR;
45076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete[] inputChars;
45086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            inputCapacity = inputLength + 1;
45096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            inputChars = new char[inputCapacity];
45106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            matchString.extract(inputChars, inputCapacity, UTF8Converter.getAlias(), status);
45116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
45126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUTF8(&inputText, inputChars, inputLength, &status);
45136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
45156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Run the test, check for expected match/don't match result.
45166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
45176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher *testMat = &testPat->matcher(status)->reset(&inputText);
45186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UBool found = testMat->find();
45196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UBool expected = FALSE;
45206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fields[2].indexOf(UChar_y) >=0) {
45216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            expected = TRUE;
45226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
45236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (expected != found) {
45246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("line %d: Expected %smatch, got %smatch",
45256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                lineNum, expected?"":"no ", found?"":"no " );
45266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
45276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
45286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Don't try to check expected results if there is no match.
45306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   (Some have stuff in the expected fields)
45316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!found) {
45326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testMat;
45336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete testPat;
45346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
45356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
45366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
45386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Interpret the Perl expression from the fourth field of the data file,
45396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // building up an ICU string from the results of the ICU match.
45406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   The Perl expression will contain references to the results of
45416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     a regex match, including the matched string, capture group strings,
45426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     group starting and ending indicies, etc.
45436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
45446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString resultString;
45456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString perlExpr = fields[3];
45466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (perlExpr.length() > 0) {
45486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            groupsMat->reset(perlExpr);
45496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            cgMat->reset(perlExpr);
45506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (perlExpr.startsWith("$&")) {
45526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                resultString.append(testMat->group(status));
45536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, 2);
45546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
45556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (groupsMat->lookingAt(status)) {
45576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // $-[0]   $+[2]  etc.
45586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeString digitString = groupsMat->group(2, status);
45596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t t = 0;
45606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
45616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeString plusOrMinus = groupsMat->group(1, status);
45626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t matchPosition;
45636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (plusOrMinus.compare("+") == 0) {
45646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    matchPosition = testMat->end(groupNum, status);
45656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
45666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    matchPosition = testMat->start(groupNum, status);
45676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (matchPosition != -1) {
45696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ICU_Utility::appendNumber(resultString, matchPosition);
45706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, groupsMat->end(status));
45726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
45736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (cgMat->lookingAt(status)) {
45756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // $1, $2, $3, etc.
45766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeString digitString = cgMat->group(1, status);
45776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t t = 0;
45786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t groupNum = ICU_Utility::parseNumber(digitString, t, 10);
45796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (U_SUCCESS(status)) {
45806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    resultString.append(testMat->group(groupNum, status));
45816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    status = U_ZERO_ERROR;
45826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, cgMat->end(status));
45846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
45856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (perlExpr.startsWith("@-")) {
45876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t i;
45886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (i=0; i<=testMat->groupCount(); i++) {
45896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (i>0) {
45906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        resultString.append(" ");
45916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
45926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ICU_Utility::appendNumber(resultString, testMat->start(i, status));
45936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, 2);
45956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
45966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (perlExpr.startsWith("@+")) {
45986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t i;
45996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (i=0; i<=testMat->groupCount(); i++) {
46006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (i>0) {
46016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        resultString.append(" ");
46026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
46036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ICU_Utility::appendNumber(resultString, testMat->end(i, status));
46046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
46056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, 2);
46066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
46076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) {    // \Escape.  Take following char as a literal.
46096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                     //           or as an escaped sequence (e.g. \n)
46106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (perlExpr.length() > 1) {
46116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    perlExpr.remove(0, 1);  // Remove the '\', but only if not last char.
46126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
46136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar c = perlExpr.charAt(0);
46146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                switch (c) {
46156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                case 'n':   c = '\n'; break;
46166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // add any other escape sequences that show up in the test expected results.
46176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
46186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                resultString.append(c);
46196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, 1);
46206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
46216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else  {
46236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Any characters from the perl expression that we don't explicitly
46246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  recognize before here are assumed to be literals and copied
46256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  as-is to the expected results.
46266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                resultString.append(perlExpr.charAt(0));
46276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                perlExpr.remove(0, 1);
46286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
46296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U_FAILURE(status)) {
46316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln("Line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
46326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
46336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
46346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
46356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
46376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Expected Results Compare
46386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
46396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString expectedS(fields[4]);
46406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expectedS.findAndReplace(nulnulSrc, nulnul);
46416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expectedS.findAndReplace(ffffSrc,   ffff);
46426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
46436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (expectedS.compare(resultString) != 0) {
46466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            err("Line %d: Incorrect perl expression results.", lineNum);
46476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            infoln((UnicodeString)"Expected \""+expectedS+(UnicodeString)"\"; got \""+resultString+(UnicodeString)"\"");
46486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
46496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete testMat;
46516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete testPat;
46526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
46536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
46556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // All done.  Clean up allocated stuff.
46566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
46576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete cgMat;
46586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete cgPat;
46596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete groupsMat;
46616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete groupsPat;
46626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete flagMat;
46646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete flagPat;
46656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete lineMat;
46676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete linePat;
46686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fieldPat;
46706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete [] testData;
46716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&patternText);
46736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&inputText);
46746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete [] patternChars;
46766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete [] inputChars;
46776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    logln("%d tests skipped because of unimplemented regexp features.", skippedUnimplementedCount);
46806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
46826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------
46856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
46866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Bug6149   Verify limits to heap expansion for backtrack stack.
46876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             Use this pattern,
46886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                 "(a?){1,8000000}"
46896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             Note: was an unbounded upperbounds, but that now has loop-breaking enabled.
46906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                   This test is likely to be fragile, as further optimizations stop
46916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                   more cases of pointless looping in the match engine.
46926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
46936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------
46946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug6149() {
46956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString pattern("(a?){1,8000000}");
46966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString s("xyz");
46976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t flags = 0;
46986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
46996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher  matcher(pattern, s, flags, status);
47016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool result = false;
47026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT_FAIL(result=matcher.matches(status), U_REGEX_STACK_OVERFLOW);
47036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(result == FALSE);
47046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }
47056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
47086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   Callbacks()    Test the callback function.
47096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  When set, callbacks occur periodically during matching operations,
47106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  giving the application code the ability to abort the operation
47116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  before it's normal completion.
47126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
47136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct callBackContext {
47156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexTest        *test;
47166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t          maxCalls;
47176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t          numCalls;
47186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t          lastSteps;
47196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    void reset(int32_t max) {maxCalls=max; numCalls=0; lastSteps=0;};
47206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
47216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN
47236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV
47246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtestCallBackFn(const void *context, int32_t steps) {
47256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    callBackContext  *info = (callBackContext *)context;
47266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (info->lastSteps+1 != steps) {
47276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        info->test->errln("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
47286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
47296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    info->lastSteps = steps;
47306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    info->numCalls++;
47316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (info->numCalls < info->maxCalls);
47326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
47336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END
47346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Callbacks() {
47366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   {
47376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Getter returns NULLs if no callback has been set
47386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   The variables that the getter will fill in.
47406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Init to non-null values so that the action of the getter can be seen.
47416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const void          *returnedContext = &returnedContext;
47426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        URegexMatchCallback *returnedFn = &testCallBackFn;
47436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
47456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher matcher("x", 0, status);
47466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
47476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.getMatchCallback(returnedFn, returnedContext, status);
47486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
47496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(returnedFn == NULL);
47506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(returnedContext == NULL);
47516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
47526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   {
47546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Set and Get work
47556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        callBackContext cbInfo = {this, 0, 0, 0};
47566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const void          *returnedContext;
47576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        URegexMatchCallback *returnedFn;
47586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
47596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);  // A pattern that can run long.
47606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
47616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.setMatchCallback(testCallBackFn, &cbInfo, status);
47626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
47636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.getMatchCallback(returnedFn, returnedContext, status);
47646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
47656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(returnedFn == testCallBackFn);
47666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(returnedContext == &cbInfo);
47676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A short-running match shouldn't invoke the callback
47696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
47706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cbInfo.reset(1);
47716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString s = "xxx";
47726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.reset(s);
47736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.matches(status));
47746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
47756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(cbInfo.numCalls == 0);
47766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A medium-length match that runs long enough to invoke the
47786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   callback, but not so long that the callback aborts it.
47796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
47806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cbInfo.reset(4);
47816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = "aaaaaaaaaaaaaaaaaaab";
47826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.reset(s);
47836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.matches(status)==FALSE);
47846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
47856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(cbInfo.numCalls > 0);
47866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A longer running match that the callback function will abort.
47886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
47896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cbInfo.reset(4);
47906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = "aaaaaaaaaaaaaaaaaaaaaaab";
47916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.reset(s);
47926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.matches(status)==FALSE);
47936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
47946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(cbInfo.numCalls == 4);
47956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
47966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
47996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
48026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   FindProgressCallbacks()    Test the find "progress" callback function.
48036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  When set, the find progress callback will be invoked during a find operations
48046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  after each return from a match attempt, giving the application the opportunity
48056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  to terminate a long-running find operation before it's normal completion.
48066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
48076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct progressCallBackContext {
48096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexTest        *test;
48106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t          lastIndex;
48116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t          maxCalls;
48126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t          numCalls;
48136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};
48146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
48156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN
48176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV
48186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtestProgressCallBackFn(const void *context, int64_t matchIndex) {
48196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    progressCallBackContext  *info = (progressCallBackContext *)context;
48206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    info->numCalls++;
48216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    info->lastIndex = matchIndex;
48226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n", matchIndex, info->numCalls);
48236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (info->numCalls < info->maxCalls);
48246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
48256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END
48266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::FindProgressCallbacks() {
48286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   {
48296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Getter returns NULLs if no callback has been set
48306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   The variables that the getter will fill in.
48326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Init to non-null values so that the action of the getter can be seen.
48336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const void                  *returnedContext = &returnedContext;
48346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        URegexFindProgressCallback  *returnedFn = &testProgressCallBackFn;
48356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
48376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher matcher("x", 0, status);
48386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
48396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.getFindProgressCallback(returnedFn, returnedContext, status);
48406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
48416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(returnedFn == NULL);
48426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(returnedContext == NULL);
48436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
48446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   {
48466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Set and Get work
48476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        progressCallBackContext cbInfo = {this, 0, 0, 0};
48486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const void                  *returnedContext;
48496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        URegexFindProgressCallback  *returnedFn;
48506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
48516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);  // A pattern that can run long.
48526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
48536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status);
48546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
48556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.getFindProgressCallback(returnedFn, returnedContext, status);
48566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
48576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(returnedFn == testProgressCallBackFn);
48586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(returnedContext == &cbInfo);
48596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A short-running match should NOT invoke the callback.
48616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
48626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cbInfo.reset(100);
48636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString s = "abxxx";
48646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.reset(s);
48656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
48666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.setTrace(TRUE);
48676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
48686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.find(0, status));
48696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
48706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(cbInfo.numCalls == 0);
48716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A medium running match that causes matcher.find() to invoke our callback for each index.
48736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
48746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = "aaaaaaaaaaaaaaaaaaab";
48756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cbInfo.reset(s.length()); //  Some upper limit for number of calls that is greater than size of our input string
48766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.reset(s);
48776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.find(0, status)==FALSE);
48786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
48796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25);
48806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point.
48826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
48836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab";
48846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cbInfo.reset(s1.length() - 5); //  Bail early somewhere near the end of input string
48856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.reset(s1);
48866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.find(0, status)==FALSE);
48876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
48886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5);
48896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
48916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Now a match that will succeed, but after an interruption
48926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
48936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx";
48946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cbInfo.reset(s2.length() - 10); //  Bail early somewhere near the end of input string
48956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        matcher.reset(s2);
48966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.find(0, status)==FALSE);
48976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
48986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Now retry the match from where left off
48996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cbInfo.maxCalls = 100; //  No callback limit
49006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status));
49016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
49026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
49036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
49046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
49076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
49106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
49116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    PreAllocatedUTextCAPI    Check the C API with pre-allocated mutable
49126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                             UTexts. The pure-C implementation of UText
49136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                             has no mutable backing stores, but we can
49146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                             use UnicodeString here to test the functionality.
49156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
49166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------------
49176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::PreAllocatedUTextCAPI () {
49186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode           status = U_ZERO_ERROR;
49196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    URegularExpression  *re;
49206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText                patternText = UTEXT_INITIALIZER;
49216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString        buffer;
49226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText                bufferText = UTEXT_INITIALIZER;
49236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUnicodeString(&bufferText, &buffer, &status);
49256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*
49276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *  getText() and getUText()
49286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
49296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
49306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText  text1 = UTEXT_INITIALIZER;
49316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText  text2 = UTEXT_INITIALIZER;
49326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar  text2Chars[20];
49336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText  *resultText;
49346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
49366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status);
49376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status);
49386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2);
49396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUChars(&text2, text2Chars, -1, &status);
49406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status);
49426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        re = uregex_openUText(&patternText, 0, NULL, &status);
49436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* First set a UText */
49456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_setUText(re, &text1, &status);
49466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        resultText = uregex_getUText(re, &bufferText, &status);
49476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
49486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(resultText == &bufferText);
49496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_setNativeIndex(resultText, 0);
49506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_setNativeIndex(&text1, 0);
49516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(testUTextEqual(resultText, &text1));
49526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        resultText = uregex_getUText(re, &bufferText, &status);
49546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
49556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(resultText == &bufferText);
49566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_setNativeIndex(resultText, 0);
49576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_setNativeIndex(&text1, 0);
49586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(testUTextEqual(resultText, &text1));
49596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* Then set a UChar * */
49616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_setText(re, text2Chars, 7, &status);
49626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        resultText = uregex_getUText(re, &bufferText, &status);
49636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
49646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(resultText == &bufferText);
49656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_setNativeIndex(resultText, 0);
49666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_setNativeIndex(&text2, 0);
49676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(testUTextEqual(resultText, &text2));
49686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_close(re);
49706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&text1);
49716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&text2);
49726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
49736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*
49756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *  group()
49766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
49776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
49786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar    text1[80];
49796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText   *actual;
49806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UBool    result;
49816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
49826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
49846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
49856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
49866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_setText(re, text1, -1, &status);
49886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = uregex_find(re, 0, &status);
49896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result==TRUE);
49906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*  Capture Group 0, the full match.  Should succeed.  */
49926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
49936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        actual = uregex_groupUTextDeep(re, 0, &bufferText, &status);
49946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
49956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(actual == &bufferText);
49966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual);
49976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*  Capture group #1.  Should succeed. */
49996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
50006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        actual = uregex_groupUTextDeep(re, 1, &bufferText, &status);
50016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
50026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(actual == &bufferText);
50036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual);
50046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*  Capture group out of range.  Error. */
50066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
50076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        actual = uregex_groupUTextDeep(re, 2, &bufferText, &status);
50086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
50096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(actual == &bufferText);
50106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_close(re);
50126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
50146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*
50166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *  replaceFirst()
50176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
50186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
50196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar    text1[80];
50206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar    text2[80];
50216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText    replText = UTEXT_INITIALIZER;
50226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText   *result;
50236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
50256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
50266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
50276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);
50286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        re = uregex_openC("x(.*?)x", 0, NULL, &status);
50306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
50316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*  Normal case, with match */
50336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_setText(re, text1, -1, &status);
50346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
50356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
50366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
50376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &bufferText);
50386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result);
50396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* No match.  Text should copy to output with no changes.  */
50416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_setText(re, text2, -1, &status);
50426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
50436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
50446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
50456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &bufferText);
50466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);
50476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* Unicode escapes */
50496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_setText(re, text1, -1, &status);
50506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status);
50516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
50526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
50536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
50546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &bufferText);
50556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result);
50566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_close(re);
50586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&replText);
50596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
50606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*
50636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *  replaceAll()
50646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
50656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
50666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar    text1[80];
50676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar    text2[80];
50686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText    replText = UTEXT_INITIALIZER;
50696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText   *result;
50706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
50726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
50736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
50746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);
50756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        re = uregex_openC("x(.*?)x", 0, NULL, &status);
50776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
50786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*  Normal case, with match */
50806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_setText(re, text1, -1, &status);
50816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
50826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = uregex_replaceAllUText(re, &replText, &bufferText, &status);
50836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
50846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &bufferText);
50856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> <1> <...>.", result);
50866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* No match.  Text should copy to output with no changes.  */
50886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_setText(re, text2, -1, &status);
50896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
50906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result = uregex_replaceAllUText(re, &replText, &bufferText, &status);
50916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_CHECK_STATUS;
50926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(result == &bufferText);
50936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);
50946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uregex_close(re);
50966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&replText);
50976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
50986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*
51016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *  splitUText() uses the C++ API directly, and the UnicodeString version uses mutable UTexts,
51026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *   so we don't need to test it here.
51036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
51046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&bufferText);
51066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&patternText);
51076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
51086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------
51106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
51116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Bug7651   Regex pattern that exceeds default operator stack depth in matcher.
51126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
51136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------
51146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug7651() {
51156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString pattern1("((?<![A-Za-z0-9])[#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|(?<![A-Za-z0-9_])[@\\uff20][A-Za-z0-9_]+(?:\\/[\\w-]+)?|(https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|\\$[A-Za-z]+)");
51166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  The following should exceed the default operator stack depth in the matcher, i.e. force the matcher to malloc instead of using fSmallData.
51176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  It will cause a segfault if RegexMatcher tries to use fSmallData instead of malloc'ing the memory needed (see init2) for the pattern operator stack allocation.
51186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString pattern2("((https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\uFFFF])|(?<![A-Za-z0-9_])[\\@\\uff20][A-Za-z0-9_]+(?:\\/[\\w\\-]+)?|(?<![A-Za-z0-9])[\\#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|\\$[A-Za-z]+)");
51196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString s("#ff @abcd This is test");
51206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPattern  *REPattern = NULL;
51216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher  *REMatcher = NULL;
51226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
51236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError pe;
51246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REPattern = RegexPattern::compile(pattern1, 0, pe, status);
51266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
51276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REMatcher = REPattern->matcher(s, status);
51286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
51296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(REMatcher->find());
51306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(REMatcher->start(status) == 0);
51316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete REPattern;
51326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete REMatcher;
51336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
51346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REPattern = RegexPattern::compile(pattern2, 0, pe, status);
51366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
51376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REMatcher = REPattern->matcher(s, status);
51386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
51396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(REMatcher->find());
51406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(REMatcher->start(status) == 0);
51416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete REPattern;
51426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete REMatcher;
51436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ZERO_ERROR;
51446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }
51456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug7740() {
51476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
51486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString pattern = "(a)";
51496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString text = "abcdef";
51506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher *m = new RegexMatcher(pattern, text, 0, status);
51516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
51526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(m->lookingAt(status));
51536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
51546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    status = U_ILLEGAL_ARGUMENT_ERROR;
51556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString s = m->group(1, status);    // Bug 7740: segfault here.
51566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
51576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(s == "");
51586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete m;
51596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
51606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Bug 8479:  was crashing whith a Bogus UnicodeString as input.
51626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug8479() {
51646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
51656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher* const pMatcher = new RegexMatcher("\\Aboo\\z", UREGEX_DOTALL|UREGEX_CASE_INSENSITIVE, status);
51676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
51686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_SUCCESS(status))
51696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
51706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString str;
51716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        str.setToBogus();
51726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pMatcher->reset(str);
51736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ZERO_ERROR;
51746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pMatcher->matches(status);
51756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
51766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete pMatcher;
51776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
51786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
51796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Bug 7029
51826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug7029() {
51836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
51846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status);
51866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString text = "abc.def";
51876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString splits[10];
51886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
51896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t numFields = pMatcher->split(text, splits, 10, status);
51906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
51916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_ASSERT(numFields == 8);
51926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete pMatcher;
51936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
51946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Bug 9283
51966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   This test is checking for the existance of any supplemental characters that case-fold
51976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   to a bmp character.
51986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
51996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   At the time of this writing there are none. If any should appear in a subsequent release
52006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   of Unicode, the code in regular expressions compilation that determines the longest
52016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   posssible match for a literal string  will need to be enhanced.
52026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
52036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength()
52046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   for details on what to do in case of a failure of this test.
52056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
52066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::Bug9283() {
52076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
52086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]]", status);
52096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REGEX_CHECK_STATUS;
52106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t index;
52116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c;
52126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (index=0; ; index++) {
52136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c = supplementalsWithCaseFolding.charAt(index);
52146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (c == -1) {
52156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
52166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
52176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString cf = UnicodeString(c).foldCase();
52186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_ASSERT(cf.length() >= 2);
52196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
52206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
52216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
52226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
52236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexTest::CheckInvBufSize() {
52246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(inv_next>=INV_BUFSIZ) {
52256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %d )\n",
52266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          __FILE__, INV_BUFSIZ, inv_next);
52276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  } else {
52286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next);
52296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
52306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
52316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
52326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
52336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5234