1/*
2******************************************************************************
3*
4*   Copyright (C) 2008-2013, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  uspoof_conf.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2009Jan05  (refactoring earlier files)
14*   created by: Andy Heninger
15*
16*   Internal classes for compililing confusable data into its binary (runtime) form.
17*/
18
19#include "unicode/utypes.h"
20#include "unicode/uspoof.h"
21#if !UCONFIG_NO_REGULAR_EXPRESSIONS
22#if !UCONFIG_NO_NORMALIZATION
23
24#include "unicode/unorm.h"
25#include "unicode/uregex.h"
26#include "unicode/ustring.h"
27#include "cmemory.h"
28#include "uspoof_impl.h"
29#include "uhash.h"
30#include "uvector.h"
31#include "uassert.h"
32#include "uarrsort.h"
33#include "uspoof_conf.h"
34
35U_NAMESPACE_USE
36
37
38//---------------------------------------------------------------------
39//
40//  buildConfusableData   Compile the source confusable data, as defined by
41//                        the Unicode data file confusables.txt, into the binary
42//                        structures used by the confusable detector.
43//
44//                        The binary structures are described in uspoof_impl.h
45//
46//     1.  parse the data, building 4 hash tables, one each for the SL, SA, ML and MA
47//         tables.  Each maps from a UChar32 to a String.
48//
49//     2.  Sort all of the strings encountered by length, since they will need to
50//         be stored in that order in the final string table.
51//
52//     3.  Build a list of keys (UChar32s) from the four mapping tables.  Sort the
53//         list because that will be the ordering of our runtime table.
54//
55//     4.  Generate the run time string table.  This is generated before the key & value
56//         tables because we need the string indexes when building those tables.
57//
58//     5.  Build the run-time key and value tables.  These are parallel tables, and are built
59//         at the same time
60//
61
62SPUString::SPUString(UnicodeString *s) {
63    fStr = s;
64    fStrTableIndex = 0;
65}
66
67
68SPUString::~SPUString() {
69    delete fStr;
70}
71
72
73SPUStringPool::SPUStringPool(UErrorCode &status) : fVec(NULL), fHash(NULL) {
74    fVec = new UVector(status);
75    fHash = uhash_open(uhash_hashUnicodeString,           // key hash function
76                       uhash_compareUnicodeString,        // Key Comparator
77                       NULL,                              // Value Comparator
78                       &status);
79}
80
81
82SPUStringPool::~SPUStringPool() {
83    int i;
84    for (i=fVec->size()-1; i>=0; i--) {
85        SPUString *s = static_cast<SPUString *>(fVec->elementAt(i));
86        delete s;
87    }
88    delete fVec;
89    uhash_close(fHash);
90}
91
92
93int32_t SPUStringPool::size() {
94    return fVec->size();
95}
96
97SPUString *SPUStringPool::getByIndex(int32_t index) {
98    SPUString *retString = (SPUString *)fVec->elementAt(index);
99    return retString;
100}
101
102
103// Comparison function for ordering strings in the string pool.
104// Compare by length first, then, within a group of the same length,
105// by code point order.
106// Conforms to the type signature for a USortComparator in uvector.h
107
108static int8_t U_CALLCONV SPUStringCompare(UHashTok left, UHashTok right) {
109	const SPUString *sL = const_cast<const SPUString *>(
110        static_cast<SPUString *>(left.pointer));
111 	const SPUString *sR = const_cast<const SPUString *>(
112 	    static_cast<SPUString *>(right.pointer));
113    int32_t lenL = sL->fStr->length();
114    int32_t lenR = sR->fStr->length();
115    if (lenL < lenR) {
116        return -1;
117    } else if (lenL > lenR) {
118        return 1;
119    } else {
120        return sL->fStr->compare(*(sR->fStr));
121    }
122}
123
124void SPUStringPool::sort(UErrorCode &status) {
125    fVec->sort(SPUStringCompare, status);
126}
127
128
129SPUString *SPUStringPool::addString(UnicodeString *src, UErrorCode &status) {
130    SPUString *hashedString = static_cast<SPUString *>(uhash_get(fHash, src));
131    if (hashedString != NULL) {
132        delete src;
133    } else {
134        hashedString = new SPUString(src);
135        uhash_put(fHash, src, hashedString, &status);
136        fVec->addElement(hashedString, status);
137    }
138    return hashedString;
139}
140
141
142
143ConfusabledataBuilder::ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status) :
144    fSpoofImpl(spImpl),
145    fInput(NULL),
146    fSLTable(NULL),
147    fSATable(NULL),
148    fMLTable(NULL),
149    fMATable(NULL),
150    fKeySet(NULL),
151    fKeyVec(NULL),
152    fValueVec(NULL),
153    fStringTable(NULL),
154    fStringLengthsTable(NULL),
155    stringPool(NULL),
156    fParseLine(NULL),
157    fParseHexNum(NULL),
158    fLineNum(0)
159{
160    if (U_FAILURE(status)) {
161        return;
162    }
163    fSLTable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
164    fSATable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
165    fMLTable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
166    fMATable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
167    fKeySet     = new UnicodeSet();
168    fKeyVec     = new UVector(status);
169    fValueVec   = new UVector(status);
170    stringPool = new SPUStringPool(status);
171}
172
173
174ConfusabledataBuilder::~ConfusabledataBuilder() {
175    uprv_free(fInput);
176    uregex_close(fParseLine);
177    uregex_close(fParseHexNum);
178    uhash_close(fSLTable);
179    uhash_close(fSATable);
180    uhash_close(fMLTable);
181    uhash_close(fMATable);
182    delete fKeySet;
183    delete fKeyVec;
184    delete fStringTable;
185    delete fStringLengthsTable;
186    delete fValueVec;
187    delete stringPool;
188}
189
190
191void ConfusabledataBuilder::buildConfusableData(SpoofImpl * spImpl, const char * confusables,
192    int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status) {
193
194    if (U_FAILURE(status)) {
195        return;
196    }
197    ConfusabledataBuilder builder(spImpl, status);
198    builder.build(confusables, confusablesLen, status);
199    if (U_FAILURE(status) && errorType != NULL) {
200        *errorType = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
201        pe->line = builder.fLineNum;
202    }
203}
204
205
206void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen,
207               UErrorCode &status) {
208
209    // Convert the user input data from UTF-8 to UChar (UTF-16)
210    int32_t inputLen = 0;
211    if (U_FAILURE(status)) {
212        return;
213    }
214    u_strFromUTF8(NULL, 0, &inputLen, confusables, confusablesLen, &status);
215    if (status != U_BUFFER_OVERFLOW_ERROR) {
216        return;
217    }
218    status = U_ZERO_ERROR;
219    fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
220    if (fInput == NULL) {
221        status = U_MEMORY_ALLOCATION_ERROR;
222        return;
223    }
224    u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status);
225
226
227    // Regular Expression to parse a line from Confusables.txt.  The expression will match
228    // any line.  What was matched is determined by examining which capture groups have a match.
229    //   Capture Group 1:  the source char
230    //   Capture Group 2:  the replacement chars
231    //   Capture Group 3-6  the table type, SL, SA, ML, or MA
232    //   Capture Group 7:  A blank or comment only line.
233    //   Capture Group 8:  A syntactically invalid line.  Anything that didn't match before.
234    // Example Line from the confusables.txt source file:
235    //   "1D702 ;	006E 0329 ;	SL	# MATHEMATICAL ITALIC SMALL ETA ... "
236    UnicodeString pattern(
237        "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;"      // Match the source char
238        "[ \\t]*([0-9A-Fa-f]+"                    // Match the replacement char(s)
239           "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;"    //     (continued)
240        "\\s*(?:(SL)|(SA)|(ML)|(MA))"             // Match the table type
241        "[ \\t]*(?:#.*?)?$"                       // Match any trailing #comment
242        "|^([ \\t]*(?:#.*?)?)$"       // OR match empty lines or lines with only a #comment
243        "|^(.*?)$", -1, US_INV);      // OR match any line, which catches illegal lines.
244    // TODO: Why are we using the regex C API here? C++ would just take UnicodeString...
245    fParseLine = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
246
247    // Regular expression for parsing a hex number out of a space-separated list of them.
248    //   Capture group 1 gets the number, with spaces removed.
249    pattern = UNICODE_STRING_SIMPLE("\\s*([0-9A-F]+)");
250    fParseHexNum = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
251
252    // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
253    //   given the syntax of the input.
254    if (*fInput == 0xfeff) {
255        *fInput = 0x20;
256    }
257
258    // Parse the input, one line per iteration of this loop.
259    uregex_setText(fParseLine, fInput, inputLen, &status);
260    while (uregex_findNext(fParseLine, &status)) {
261        fLineNum++;
262        if (uregex_start(fParseLine, 7, &status) >= 0) {
263            // this was a blank or comment line.
264            continue;
265        }
266        if (uregex_start(fParseLine, 8, &status) >= 0) {
267            // input file syntax error.
268            status = U_PARSE_ERROR;
269            return;
270        }
271
272        // We have a good input line.  Extract the key character and mapping string, and
273        //    put them into the appropriate mapping table.
274        UChar32 keyChar = SpoofImpl::ScanHex(fInput, uregex_start(fParseLine, 1, &status),
275                          uregex_end(fParseLine, 1, &status), status);
276
277        int32_t mapStringStart = uregex_start(fParseLine, 2, &status);
278        int32_t mapStringLength = uregex_end(fParseLine, 2, &status) - mapStringStart;
279        uregex_setText(fParseHexNum, &fInput[mapStringStart], mapStringLength, &status);
280
281        UnicodeString  *mapString = new UnicodeString();
282        if (mapString == NULL) {
283            status = U_MEMORY_ALLOCATION_ERROR;
284            return;
285        }
286        while (uregex_findNext(fParseHexNum, &status)) {
287            UChar32 c = SpoofImpl::ScanHex(&fInput[mapStringStart], uregex_start(fParseHexNum, 1, &status),
288                                 uregex_end(fParseHexNum, 1, &status), status);
289            mapString->append(c);
290        }
291        U_ASSERT(mapString->length() >= 1);
292
293        // Put the map (value) string into the string pool
294        // This a little like a Java intern() - any duplicates will be eliminated.
295        SPUString *smapString = stringPool->addString(mapString, status);
296
297        // Add the UChar32 -> string mapping to the appropriate table.
298        UHashtable *table = uregex_start(fParseLine, 3, &status) >= 0 ? fSLTable :
299                            uregex_start(fParseLine, 4, &status) >= 0 ? fSATable :
300                            uregex_start(fParseLine, 5, &status) >= 0 ? fMLTable :
301                            uregex_start(fParseLine, 6, &status) >= 0 ? fMATable :
302                            NULL;
303        U_ASSERT(table != NULL);
304        uhash_iput(table, keyChar, smapString, &status);
305        fKeySet->add(keyChar);
306        if (U_FAILURE(status)) {
307            return;
308        }
309    }
310
311    // Input data is now all parsed and collected.
312    // Now create the run-time binary form of the data.
313    //
314    // This is done in two steps.  First the data is assembled into vectors and strings,
315    //   for ease of construction, then the contents of these collections are dumped
316    //   into the actual raw-bytes data storage.
317
318    // Build up the string array, and record the index of each string therein
319    //  in the (build time only) string pool.
320    // Strings of length one are not entered into the strings array.
321    // At the same time, build up the string lengths table, which records the
322    // position in the string table of the first string of each length >= 4.
323    // (Strings in the table are sorted by length)
324    stringPool->sort(status);
325    fStringTable = new UnicodeString();
326    fStringLengthsTable = new UVector(status);
327    int32_t previousStringLength = 0;
328    int32_t previousStringIndex  = 0;
329    int32_t poolSize = stringPool->size();
330    int32_t i;
331    for (i=0; i<poolSize; i++) {
332        SPUString *s = stringPool->getByIndex(i);
333        int32_t strLen = s->fStr->length();
334        int32_t strIndex = fStringTable->length();
335        U_ASSERT(strLen >= previousStringLength);
336        if (strLen == 1) {
337            // strings of length one do not get an entry in the string table.
338            // Keep the single string character itself here, which is the same
339            //  convention that is used in the final run-time string table index.
340            s->fStrTableIndex = s->fStr->charAt(0);
341        } else {
342            if ((strLen > previousStringLength) && (previousStringLength >= 4)) {
343                fStringLengthsTable->addElement(previousStringIndex, status);
344                fStringLengthsTable->addElement(previousStringLength, status);
345            }
346            s->fStrTableIndex = strIndex;
347            fStringTable->append(*(s->fStr));
348        }
349        previousStringLength = strLen;
350        previousStringIndex  = strIndex;
351    }
352    // Make the final entry to the string lengths table.
353    //   (it holds an entry for the _last_ string of each length, so adding the
354    //    final one doesn't happen in the main loop because no longer string was encountered.)
355    if (previousStringLength >= 4) {
356        fStringLengthsTable->addElement(previousStringIndex, status);
357        fStringLengthsTable->addElement(previousStringLength, status);
358    }
359
360    // Construct the compile-time Key and Value tables
361    //
362    // For each key code point, check which mapping tables it applies to,
363    //   and create the final data for the key & value structures.
364    //
365    //   The four logical mapping tables are conflated into one combined table.
366    //   If multiple logical tables have the same mapping for some key, they
367    //     share a single entry in the combined table.
368    //   If more than one mapping exists for the same key code point, multiple
369    //     entries will be created in the table
370
371    for (int32_t range=0; range<fKeySet->getRangeCount(); range++) {
372        // It is an oddity of the UnicodeSet API that simply enumerating the contained
373        //   code points requires a nested loop.
374        for (UChar32 keyChar=fKeySet->getRangeStart(range);
375                keyChar <= fKeySet->getRangeEnd(range); keyChar++) {
376            addKeyEntry(keyChar, fSLTable, USPOOF_SL_TABLE_FLAG, status);
377            addKeyEntry(keyChar, fSATable, USPOOF_SA_TABLE_FLAG, status);
378            addKeyEntry(keyChar, fMLTable, USPOOF_ML_TABLE_FLAG, status);
379            addKeyEntry(keyChar, fMATable, USPOOF_MA_TABLE_FLAG, status);
380        }
381    }
382
383    // Put the assembled data into the flat runtime array
384    outputData(status);
385
386    // All of the intermediate allocated data belongs to the ConfusabledataBuilder
387    //  object  (this), and is deleted in the destructor.
388    return;
389}
390
391//
392// outputData     The confusable data has been compiled and stored in intermediate
393//                collections and strings.  Copy it from there to the final flat
394//                binary array.
395//
396//                Note that as each section is added to the output data, the
397//                expand (reserveSpace() function will likely relocate it in memory.
398//                Be careful with pointers.
399//
400void ConfusabledataBuilder::outputData(UErrorCode &status) {
401
402    U_ASSERT(fSpoofImpl->fSpoofData->fDataOwned == TRUE);
403
404    //  The Key Table
405    //     While copying the keys to the runtime array,
406    //       also sanity check that they are sorted.
407
408    int32_t numKeys = fKeyVec->size();
409    int32_t *keys =
410        static_cast<int32_t *>(fSpoofImpl->fSpoofData->reserveSpace(numKeys*sizeof(int32_t), status));
411    if (U_FAILURE(status)) {
412        return;
413    }
414    int i;
415    int32_t previousKey = 0;
416    for (i=0; i<numKeys; i++) {
417        int32_t key =  fKeyVec->elementAti(i);
418        (void)previousKey;         // Suppress unused variable warning on gcc.
419        U_ASSERT((key & 0x00ffffff) >= (previousKey & 0x00ffffff));
420        U_ASSERT((key & 0xff000000) != 0);
421        keys[i] = key;
422        previousKey = key;
423    }
424    SpoofDataHeader *rawData = fSpoofImpl->fSpoofData->fRawData;
425    rawData->fCFUKeys = (int32_t)((char *)keys - (char *)rawData);
426    rawData->fCFUKeysSize = numKeys;
427    fSpoofImpl->fSpoofData->fCFUKeys = keys;
428
429
430    // The Value Table, parallels the key table
431    int32_t numValues = fValueVec->size();
432    U_ASSERT(numKeys == numValues);
433    uint16_t *values =
434        static_cast<uint16_t *>(fSpoofImpl->fSpoofData->reserveSpace(numKeys*sizeof(uint16_t), status));
435    if (U_FAILURE(status)) {
436        return;
437    }
438    for (i=0; i<numValues; i++) {
439        uint32_t value = static_cast<uint32_t>(fValueVec->elementAti(i));
440        U_ASSERT(value < 0xffff);
441        values[i] = static_cast<uint16_t>(value);
442    }
443    rawData = fSpoofImpl->fSpoofData->fRawData;
444    rawData->fCFUStringIndex = (int32_t)((char *)values - (char *)rawData);
445    rawData->fCFUStringIndexSize = numValues;
446    fSpoofImpl->fSpoofData->fCFUValues = values;
447
448    // The Strings Table.
449
450    uint32_t stringsLength = fStringTable->length();
451    // Reserve an extra space so the string will be nul-terminated.  This is
452    // only a convenience, for when debugging; it is not needed otherwise.
453    UChar *strings =
454        static_cast<UChar *>(fSpoofImpl->fSpoofData->reserveSpace(stringsLength*sizeof(UChar)+2, status));
455    if (U_FAILURE(status)) {
456        return;
457    }
458    fStringTable->extract(strings, stringsLength+1, status);
459    rawData = fSpoofImpl->fSpoofData->fRawData;
460    U_ASSERT(rawData->fCFUStringTable == 0);
461    rawData->fCFUStringTable = (int32_t)((char *)strings - (char *)rawData);
462    rawData->fCFUStringTableLen = stringsLength;
463    fSpoofImpl->fSpoofData->fCFUStrings = strings;
464
465    // The String Lengths Table
466    //    While copying into the runtime array do some sanity checks on the values
467    //    Each complete entry contains two fields, an index and an offset.
468    //    Lengths should increase with each entry.
469    //    Offsets should be less than the size of the string table.
470    int32_t lengthTableLength = fStringLengthsTable->size();
471    uint16_t *stringLengths =
472        static_cast<uint16_t *>(fSpoofImpl->fSpoofData->reserveSpace(lengthTableLength*sizeof(uint16_t), status));
473    if (U_FAILURE(status)) {
474        return;
475    }
476    int32_t destIndex = 0;
477    uint32_t previousLength = 0;
478    for (i=0; i<lengthTableLength; i+=2) {
479        uint32_t offset = static_cast<uint32_t>(fStringLengthsTable->elementAti(i));
480        uint32_t length = static_cast<uint32_t>(fStringLengthsTable->elementAti(i+1));
481        U_ASSERT(offset < stringsLength);
482        U_ASSERT(length < 40);
483        (void)previousLength;  // Suppress unused variable warning on gcc.
484        U_ASSERT(length > previousLength);
485        stringLengths[destIndex++] = static_cast<uint16_t>(offset);
486        stringLengths[destIndex++] = static_cast<uint16_t>(length);
487        previousLength = length;
488    }
489    rawData = fSpoofImpl->fSpoofData->fRawData;
490    rawData->fCFUStringLengths = (int32_t)((char *)stringLengths - (char *)rawData);
491    // Note: StringLengthsSize in the raw data is the number of complete entries,
492    //       each consisting of a pair of 16 bit values, hence the divide by 2.
493    rawData->fCFUStringLengthsSize = lengthTableLength / 2;
494    fSpoofImpl->fSpoofData->fCFUStringLengths =
495        reinterpret_cast<SpoofStringLengthsElement *>(stringLengths);
496}
497
498
499
500//  addKeyEntry   Construction of the confusable Key and Mapping Values tables.
501//                This is an intermediate point in the building process.
502//                We already have the mappings in the hash tables fSLTable, etc.
503//                This function builds corresponding run-time style table entries into
504//                  fKeyVec and fValueVec
505
506void ConfusabledataBuilder::addKeyEntry(
507    UChar32     keyChar,     // The key character
508    UHashtable *table,       // The table, one of SATable, MATable, etc.
509    int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
510    UErrorCode &status) {
511
512    SPUString *targetMapping = static_cast<SPUString *>(uhash_iget(table, keyChar));
513    if (targetMapping == NULL) {
514        // No mapping for this key character.
515        //   (This function is called for all four tables for each key char that
516        //    is seen anywhere, so this no entry cases are very much expected.)
517        return;
518    }
519
520    // Check whether there is already an entry with the correct mapping.
521    // If so, simply set the flag in the keyTable saying that the existing entry
522    // applies to the table that we're doing now.
523
524    UBool keyHasMultipleValues = FALSE;
525    int32_t i;
526    for (i=fKeyVec->size()-1; i>=0 ; i--) {
527        int32_t key = fKeyVec->elementAti(i);
528        if ((key & 0x0ffffff) != keyChar) {
529            // We have now checked all existing key entries for this key char (if any)
530            //  without finding one with the same mapping.
531            break;
532        }
533        UnicodeString mapping = getMapping(i);
534        if (mapping == *(targetMapping->fStr)) {
535            // The run time entry we are currently testing has the correct mapping.
536            // Set the flag in it indicating that it applies to the new table also.
537            key |= tableFlag;
538            fKeyVec->setElementAt(key, i);
539            return;
540        }
541        keyHasMultipleValues = TRUE;
542    }
543
544    // Need to add a new entry to the binary data being built for this mapping.
545    // Includes adding entries to both the key table and the parallel values table.
546
547    int32_t newKey = keyChar | tableFlag;
548    if (keyHasMultipleValues) {
549        newKey |= USPOOF_KEY_MULTIPLE_VALUES;
550    }
551    int32_t adjustedMappingLength = targetMapping->fStr->length() - 1;
552    if (adjustedMappingLength>3) {
553        adjustedMappingLength = 3;
554    }
555    newKey |= adjustedMappingLength << USPOOF_KEY_LENGTH_SHIFT;
556
557    int32_t newData = targetMapping->fStrTableIndex;
558
559    fKeyVec->addElement(newKey, status);
560    fValueVec->addElement(newData, status);
561
562    // If the preceding key entry is for the same key character (but with a different mapping)
563    //   set the multiple-values flag on it.
564    if (keyHasMultipleValues) {
565        int32_t previousKeyIndex = fKeyVec->size() - 2;
566        int32_t previousKey = fKeyVec->elementAti(previousKeyIndex);
567        previousKey |= USPOOF_KEY_MULTIPLE_VALUES;
568        fKeyVec->setElementAt(previousKey, previousKeyIndex);
569    }
570}
571
572
573
574UnicodeString ConfusabledataBuilder::getMapping(int32_t index) {
575    int32_t key = fKeyVec->elementAti(index);
576    int32_t value = fValueVec->elementAti(index);
577    int32_t length = USPOOF_KEY_LENGTH_FIELD(key);
578    int32_t lastIndexWithLen;
579    switch (length) {
580      case 0:
581        return UnicodeString(static_cast<UChar>(value));
582      case 1:
583      case 2:
584        return UnicodeString(*fStringTable, value, length+1);
585      case 3:
586        length = 0;
587        int32_t i;
588        for (i=0; i<fStringLengthsTable->size(); i+=2) {
589            lastIndexWithLen = fStringLengthsTable->elementAti(i);
590            if (value <= lastIndexWithLen) {
591                length = fStringLengthsTable->elementAti(i+1);
592                break;
593            }
594        }
595        U_ASSERT(length>=3);
596        return UnicodeString(*fStringTable, value, length);
597      default:
598        U_ASSERT(FALSE);
599    }
600    return UnicodeString();
601}
602
603#endif
604#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
605
606