1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 2001-2011, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 06/07/01 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unifilt.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchar.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uniset.h" 1883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "name2uni.h" 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "patternprops.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uprops.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uinvchar.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(NameUnicodeTransliterator) 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar OPEN[] = {92,78,126,123,126,0}; // "\N~{~" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar OPEN_DELIM = 92; // '\\' first char of OPEN 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar CLOSE_DELIM = 125; // '}' 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar SPACE = 32; // ' ' 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// USetAdder implementation 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Does not use uset.h to reduce code dependencies 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_set_add(USet *set, UChar32 c) { 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uset_add(set, c); 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// These functions aren't used. 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*static void U_CALLCONV 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_set_addRange(USet *set, UChar32 start, UChar32 end) { 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((UnicodeSet *)set)->add(start, end); 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_set_addString(USet *set, const UChar *str, int32_t length) { 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/ 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs a transliterator with the default delimiters '{' and 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * '}'. 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruNameUnicodeTransliterator::NameUnicodeTransliterator(UnicodeFilter* adoptedFilter) : 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(UNICODE_STRING("Name-Any", 8), adoptedFilter) { 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeSet *legalPtr = &legal; 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Get the legal character set 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru USetAdder sa = { 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (USet *)legalPtr, // USet* == UnicodeSet* 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _set_add, 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, // Don't need _set_addRange 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, // Don't need _set_addString 7185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho NULL, // Don't need remove() 7285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho NULL 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_getCharNameCharacters(&sa); 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor. 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruNameUnicodeTransliterator::~NameUnicodeTransliterator() {} 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruNameUnicodeTransliterator::NameUnicodeTransliterator(const NameUnicodeTransliterator& o) : 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(o), legal(o.legal) {} 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Assignment operator. 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*NameUnicodeTransliterator& NameUnicodeTransliterator::operator=( 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const NameUnicodeTransliterator& o) { 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::operator=(o); 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // not necessary: the legal sets should all be the same -- legal=o.legal; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return *this; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/ 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator API. 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* NameUnicodeTransliterator::clone(void) const { 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new NameUnicodeTransliterator(*this); 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}. 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isIncremental) const { 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The failure mode, here and below, is to behave like Any-Null, 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // if either there is no name data (max len == 0) or there is no 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // memory (malloc() => NULL). 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t maxLen = uprv_getMaxCharNameLength(); 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (maxLen == 0) { 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.start = offsets.limit; 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Accomodate the longest possible name 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++maxLen; // allow for temporary trailing space 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char* cbuf = (char*) uprv_malloc(maxLen); 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cbuf == NULL) { 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.start = offsets.limit; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString openPat(TRUE, OPEN, -1); 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString str, name; 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t cursor = offsets.start; 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit = offsets.limit; 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Modes: 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 0 - looking for open delimiter 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 1 - after open delimiter 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t mode = 0; 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t openPos = -1; // open delim candidate pos 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (cursor < limit) { 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = text.char32At(cursor); 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch (mode) { 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0: // looking for open delimiter 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c == OPEN_DELIM) { // quick check first 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru openPos = cursor; 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i = 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::parsePattern(openPat, text, cursor, limit); 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (i >= 0 && i < limit) { 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mode = 1; 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.truncate(0); 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursor = i; 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; // *** reprocess char32At(cursor) 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: // after open delimiter 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Look for legal chars. If \s+ is found, convert it 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // to a single space. If closeDelimiter is found, exit 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the loop. If any other character is found, exit the 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // loop. If the limit is reached, exit the loop. 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Convert \s+ => SPACE. This assumes there are no 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // runs of >1 space characters in names. 167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (PatternProps::isWhiteSpace(c)) { 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Ignore leading whitespace 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (name.length() > 0 && 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.charAt(name.length()-1) != SPACE) { 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.append(SPACE); 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If we are too long then abort. maxLen includes 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // temporary trailing space, so use '>'. 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (name.length() > maxLen) { 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mode = 0; 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c == CLOSE_DELIM) { 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len = name.length(); 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Delete trailing space, if any 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (len > 0 && 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.charAt(len-1) == SPACE) { 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --len; 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (uprv_isInvariantUString(name.getBuffer(), len)) { 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.extract(0, len, cbuf, maxLen, US_INV); 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status); 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_SUCCESS(status)) { 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Lookup succeeded 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // assert(U16_LENGTH(CLOSE_DELIM) == 1); 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursor++; // advance over CLOSE_DELIM 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru str.truncate(0); 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru str.append(c); 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.handleReplaceBetween(openPos, cursor, str); 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Adjust indices for the change in the length of 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the string. Do not assume that str.length() == 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 1, in case of surrogates. 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t delta = cursor - openPos - str.length(); 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursor -= delta; 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit -= delta; 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assert(cursor == openPos + str.length()); 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If the lookup failed, we leave things as-is and 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // still switch to mode 0 and continue. 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mode = 0; 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru openPos = -1; // close off candidate 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; // *** reprocess char32At(cursor) 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Check if c is a legal char. We assume here that 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // legal.contains(OPEN_DELIM) is FALSE, so when we abort a 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // name, we don't have to go back to openPos+1. 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (legal.contains(c)) { 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.append(c); 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If we go past the longest possible name then abort. 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // maxLen includes temporary trailing space, so use '>='. 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (name.length() >= maxLen) { 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mode = 0; 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Invalid character 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --cursor; // Backup and reprocess this character 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mode = 0; 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius cursor += U16_LENGTH(c); 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.contextLimit += limit - offsets.limit; 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.limit = limit; 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // In incremental mode, only advance the cursor up to the last 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // open delimiter candidate. 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor; 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(cbuf); 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 257