1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 2001-2011, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 06/07/01 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unifilt.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchar.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uniset.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "name2uni.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "patternprops.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uprops.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uinvchar.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(NameUnicodeTransliterator) 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar OPEN[] = {92,78,126,123,126,0}; // "\N~{~" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar OPEN_DELIM = 92; // '\\' first char of OPEN 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar CLOSE_DELIM = 125; // '}' 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar SPACE = 32; // ' ' 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// USetAdder implementation 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Does not use uset.h to reduce code dependencies 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_set_add(USet *set, UChar32 c) { 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uset_add(set, c); 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// These functions aren't used. 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*static void U_CALLCONV 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_set_addRange(USet *set, UChar32 start, UChar32 end) { 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((UnicodeSet *)set)->add(start, end); 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_set_addString(USet *set, const UChar *str, int32_t length) { 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/ 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs a transliterator with the default delimiters '{' and 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * '}'. 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruNameUnicodeTransliterator::NameUnicodeTransliterator(UnicodeFilter* adoptedFilter) : 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(UNICODE_STRING("Name-Any", 8), adoptedFilter) { 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeSet *legalPtr = &legal; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Get the legal character set 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru USetAdder sa = { 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (USet *)legalPtr, // USet* == UnicodeSet* 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _set_add, 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, // Don't need _set_addRange 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, // Don't need _set_addString 7085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho NULL, // Don't need remove() 7185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho NULL 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_getCharNameCharacters(&sa); 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor. 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruNameUnicodeTransliterator::~NameUnicodeTransliterator() {} 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruNameUnicodeTransliterator::NameUnicodeTransliterator(const NameUnicodeTransliterator& o) : 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(o), legal(o.legal) {} 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Assignment operator. 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*NameUnicodeTransliterator& NameUnicodeTransliterator::operator=( 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const NameUnicodeTransliterator& o) { 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::operator=(o); 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // not necessary: the legal sets should all be the same -- legal=o.legal; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return *this; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/ 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator API. 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* NameUnicodeTransliterator::clone(void) const { 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new NameUnicodeTransliterator(*this); 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}. 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isIncremental) const { 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The failure mode, here and below, is to behave like Any-Null, 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // if either there is no name data (max len == 0) or there is no 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // memory (malloc() => NULL). 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t maxLen = uprv_getMaxCharNameLength(); 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (maxLen == 0) { 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.start = offsets.limit; 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Accomodate the longest possible name 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++maxLen; // allow for temporary trailing space 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char* cbuf = (char*) uprv_malloc(maxLen); 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cbuf == NULL) { 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.start = offsets.limit; 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString openPat(TRUE, OPEN, -1); 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString str, name; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t cursor = offsets.start; 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit = offsets.limit; 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Modes: 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 0 - looking for open delimiter 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 1 - after open delimiter 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t mode = 0; 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t openPos = -1; // open delim candidate pos 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (cursor < limit) { 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = text.char32At(cursor); 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch (mode) { 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0: // looking for open delimiter 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c == OPEN_DELIM) { // quick check first 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru openPos = cursor; 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i = 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::parsePattern(openPat, text, cursor, limit); 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (i >= 0 && i < limit) { 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mode = 1; 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.truncate(0); 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursor = i; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; // *** reprocess char32At(cursor) 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: // after open delimiter 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Look for legal chars. If \s+ is found, convert it 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // to a single space. If closeDelimiter is found, exit 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the loop. If any other character is found, exit the 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // loop. If the limit is reached, exit the loop. 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Convert \s+ => SPACE. This assumes there are no 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // runs of >1 space characters in names. 166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (PatternProps::isWhiteSpace(c)) { 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Ignore leading whitespace 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (name.length() > 0 && 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.charAt(name.length()-1) != SPACE) { 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.append(SPACE); 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If we are too long then abort. maxLen includes 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // temporary trailing space, so use '>'. 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (name.length() > maxLen) { 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mode = 0; 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c == CLOSE_DELIM) { 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len = name.length(); 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Delete trailing space, if any 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (len > 0 && 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.charAt(len-1) == SPACE) { 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --len; 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (uprv_isInvariantUString(name.getBuffer(), len)) { 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.extract(0, len, cbuf, maxLen, US_INV); 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status); 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_SUCCESS(status)) { 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Lookup succeeded 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assert(UTF_CHAR_LENGTH(CLOSE_DELIM) == 1); 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursor++; // advance over CLOSE_DELIM 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru str.truncate(0); 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru str.append(c); 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.handleReplaceBetween(openPos, cursor, str); 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Adjust indices for the change in the length of 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the string. Do not assume that str.length() == 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 1, in case of surrogates. 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t delta = cursor - openPos - str.length(); 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursor -= delta; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit -= delta; 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assert(cursor == openPos + str.length()); 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If the lookup failed, we leave things as-is and 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // still switch to mode 0 and continue. 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mode = 0; 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru openPos = -1; // close off candidate 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; // *** reprocess char32At(cursor) 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Check if c is a legal char. We assume here that 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // legal.contains(OPEN_DELIM) is FALSE, so when we abort a 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // name, we don't have to go back to openPos+1. 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (legal.contains(c)) { 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.append(c); 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If we go past the longest possible name then abort. 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // maxLen includes temporary trailing space, so use '>='. 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (name.length() >= maxLen) { 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mode = 0; 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Invalid character 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --cursor; // Backup and reprocess this character 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mode = 0; 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursor += UTF_CHAR_LENGTH(c); 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.contextLimit += limit - offsets.limit; 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.limit = limit; 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // In incremental mode, only advance the cursor up to the last 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // open delimiter candidate. 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(cbuf); 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 256