1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 31b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2013-2015, International Business Machines 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationruleparser.cpp 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* (replaced the former ucol_tok.cpp) 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2013apr10 11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/normalizer2.h" 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/parseerr.h" 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uchar.h" 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ucol.h" 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uloc.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/unistr.h" 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utf16.h" 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "charstr.h" 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h" 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h" 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h" 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationruleparser.h" 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsettings.h" 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationtailoring.h" 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cstring.h" 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "patternprops.h" 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h" 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr32.h" 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace { 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const UChar BEFORE[] = { 0x5b, 0x62, 0x65, 0x66, 0x6f, 0x72, 0x65, 0 }; // "[before" 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusconst int32_t BEFORE_LENGTH = 7; 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} // namespace 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::Sink::~Sink() {} 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::Sink::suppressContractions(const UnicodeSet &, const char *&, UErrorCode &) {} 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::Sink::optimize(const UnicodeSet &, const char *&, UErrorCode &) {} 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::Importer::~Importer() {} 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::CollationRuleParser(const CollationData *base, UErrorCode &errorCode) 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : nfd(*Normalizer2::getNFDInstance(errorCode)), 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius nfc(*Normalizer2::getNFCInstance(errorCode)), 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rules(NULL), baseData(base), settings(NULL), 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError(NULL), errorReason(NULL), 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink(NULL), importer(NULL), 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex(0) { 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::~CollationRuleParser() { 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parse(const UnicodeString &ruleString, 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationSettings &outSettings, 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UParseError *outParseError, 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode) { 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings = &outSettings; 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError = outParseError; 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(parseError != NULL) { 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError->line = 0; 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError->offset = -1; 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError->preContext[0] = 0; 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError->postContext[0] = 0; 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorReason = NULL; 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parse(ruleString, errorCode); 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parse(const UnicodeString &ruleString, UErrorCode &errorCode) { 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rules = &ruleString; 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = 0; 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(ruleIndex < rules->length()) { 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = rules->charAt(ruleIndex); 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(PatternProps::isWhiteSpace(c)) { 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++ruleIndex; 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius switch(c) { 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x26: // '&' 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseRuleChain(errorCode); 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x5b: // '[' 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseSetting(errorCode); 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x23: // '#' starts a comment, until the end of the line 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = skipComment(ruleIndex + 1); 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x40: // '@' is equivalent to [backwards 2] 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings->setFlag(CollationSettings::BACKWARD_SECONDARY, 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCOL_ON, 0, errorCode); 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++ruleIndex; 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x21: // '!' used to turn on Thai/Lao character reversal 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Accept but ignore. The root collator has contractions 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // that are equivalent to the character reversal, where appropriate. 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++ruleIndex; 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius default: 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("expected a reset or setting or comment", errorCode); 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseRuleChain(UErrorCode &errorCode) { 128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t resetStrength = parseResetAndPosition(errorCode); 129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool isFirstRelation = TRUE; 130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t result = parseRelationOperator(errorCode); 132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(result < 0) { 134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ruleIndex < rules->length() && rules->charAt(ruleIndex) == 0x23) { 135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // '#' starts a comment, until the end of the line 136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = skipComment(ruleIndex + 1); 137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isFirstRelation) { 140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("reset not followed by a relation", errorCode); 141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t strength = result & STRENGTH_MASK; 145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(resetStrength < UCOL_IDENTICAL) { 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // reset-before rule chain 147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isFirstRelation) { 148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(strength != resetStrength) { 149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("reset-before strength differs from its first relation", errorCode); 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(strength < resetStrength) { 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("reset-before strength followed by a stronger relation", errorCode); 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t i = ruleIndex + (result >> OFFSET_SHIFT); // skip over the relation operator 160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((result & STARRED_FLAG) == 0) { 161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseRelationStrings(strength, i, errorCode); 162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseStarredCharacters(strength, i, errorCode); 164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius isFirstRelation = FALSE; 167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseResetAndPosition(UErrorCode &errorCode) { 172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; } 173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t i = skipWhiteSpace(ruleIndex + 1); 174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t j; 175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c; 176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t resetStrength; 177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(rules->compare(i, BEFORE_LENGTH, BEFORE, 0, BEFORE_LENGTH) == 0 && 178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (j = i + BEFORE_LENGTH) < rules->length() && 179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius PatternProps::isWhiteSpace(rules->charAt(j)) && 180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ((j = skipWhiteSpace(j + 1)) + 1) < rules->length() && 181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0x31 <= (c = rules->charAt(j)) && c <= 0x33 && 182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rules->charAt(j + 1) == 0x5d) { 183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // &[before n] with n=1 or 2 or 3 184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius resetStrength = UCOL_PRIMARY + (c - 0x31); 185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = skipWhiteSpace(j + 2); 186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius resetStrength = UCOL_IDENTICAL; 188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i >= rules->length()) { 190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("reset without position", errorCode); 191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return UCOL_DEFAULT; 192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString str; 194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(rules->charAt(i) == 0x5b) { // '[' 195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = parseSpecialPosition(i, str, errorCode); 196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = parseTailoringString(i, str, errorCode); 198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->addReset(resetStrength, str, errorReason, errorCode); 200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { setErrorContext(); } 201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = i; 202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return resetStrength; 203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseRelationOperator(UErrorCode &errorCode) { 207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; } 208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = skipWhiteSpace(ruleIndex); 209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ruleIndex >= rules->length()) { return UCOL_DEFAULT; } 210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t strength; 211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t i = ruleIndex; 212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = rules->charAt(i++); 213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius switch(c) { 214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x3c: // '<' 215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i < rules->length() && rules->charAt(i) == 0x3c) { // << 216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; 217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i < rules->length() && rules->charAt(i) == 0x3c) { // <<< 218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; 219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i < rules->length() && rules->charAt(i) == 0x3c) { // <<<< 220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; 221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius strength = UCOL_QUATERNARY; 222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius strength = UCOL_TERTIARY; 224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius strength = UCOL_SECONDARY; 227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius strength = UCOL_PRIMARY; 230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i < rules->length() && rules->charAt(i) == 0x2a) { // '*' 232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; 233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius strength |= STARRED_FLAG; 234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x3b: // ';' same as << 237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius strength = UCOL_SECONDARY; 238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x2c: // ',' same as <<< 240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius strength = UCOL_TERTIARY; 241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x3d: // '=' 243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius strength = UCOL_IDENTICAL; 244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i < rules->length() && rules->charAt(i) == 0x2a) { // '*' 245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; 246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius strength |= STARRED_FLAG; 247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius default: 250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return UCOL_DEFAULT; 251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ((i - ruleIndex) << OFFSET_SHIFT) | strength; 253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseRelationStrings(int32_t strength, int32_t i, UErrorCode &errorCode) { 257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Parse 258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // prefix | str / extension 259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // where prefix and extension are optional. 260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString prefix, str, extension; 261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = parseTailoringString(i, str, errorCode); 262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar next = (i < rules->length()) ? rules->charAt(i) : 0; 264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(next == 0x7c) { // '|' separates the context prefix from the string. 265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prefix = str; 266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = parseTailoringString(i + 1, str, errorCode); 267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius next = (i < rules->length()) ? rules->charAt(i) : 0; 269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(next == 0x2f) { // '/' separates the string from the extension. 271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = parseTailoringString(i + 1, extension, errorCode); 272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!prefix.isEmpty()) { 274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 prefix0 = prefix.char32At(0); 275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = str.char32At(0); 276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!nfc.hasBoundaryBefore(prefix0) || !nfc.hasBoundaryBefore(c)) { 277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("in 'prefix|str', prefix and str must each start with an NFC boundary", 278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode); 279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->addRelation(strength, prefix, str, extension, errorReason, errorCode); 283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { setErrorContext(); } 284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = i; 285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseStarredCharacters(int32_t strength, int32_t i, UErrorCode &errorCode) { 289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString empty, raw; 290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = parseString(skipWhiteSpace(i), raw, errorCode); 291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(raw.isEmpty()) { 293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("missing starred-relation string", errorCode); 294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 prev = -1; 297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t j = 0; 298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(j < raw.length()) { 300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = raw.char32At(j); 301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!nfd.isInert(c)) { 302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("starred-relation string is not all NFD-inert", errorCode); 303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->addRelation(strength, empty, UnicodeString(c), empty, errorReason, errorCode); 306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { 307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setErrorContext(); 308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius j += U16_LENGTH(c); 311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prev = c; 312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i >= rules->length() || rules->charAt(i) != 0x2d) { // '-' 314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(prev < 0) { 317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("range without start in starred-relation string", errorCode); 318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = parseString(i + 1, raw, errorCode); 321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(raw.isEmpty()) { 323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("range without end in starred-relation string", errorCode); 324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = raw.char32At(0); 327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c < prev) { 328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("range start greater than end in starred-relation string", errorCode); 329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // range prev-c 332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString s; 333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(++prev <= c) { 334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!nfd.isInert(prev)) { 335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("starred-relation string range is not all NFD-inert", errorCode); 336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_IS_SURROGATE(prev)) { 339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("starred-relation string range contains a surrogate", errorCode); 340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(0xfffd <= prev && prev <= 0xffff) { 343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("starred-relation string range contains U+FFFD, U+FFFE or U+FFFF", errorCode); 344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s.setTo(prev); 347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->addRelation(strength, empty, s, empty, errorReason, errorCode); 348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { 349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setErrorContext(); 350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prev = -1; 354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius j = U16_LENGTH(c); 355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = skipWhiteSpace(i); 357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseTailoringString(int32_t i, UnicodeString &raw, UErrorCode &errorCode) { 361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = parseString(skipWhiteSpace(i), raw, errorCode); 362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_SUCCESS(errorCode) && raw.isEmpty()) { 363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("missing relation string", errorCode); 364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return skipWhiteSpace(i); 366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseString(int32_t i, UnicodeString &raw, UErrorCode &errorCode) { 370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return i; } 371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius raw.remove(); 372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(i < rules->length()) { 373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = rules->charAt(i++); 374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isSyntaxChar(c)) { 375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c == 0x27) { // apostrophe 376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i < rules->length() && rules->charAt(i) == 0x27) { 377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Double apostrophe, encodes a single one. 378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius raw.append((UChar)0x27); 379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; 380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Quote literal text until the next single apostrophe. 383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i == rules->length()) { 385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("quoted literal text missing terminating apostrophe", errorCode); 386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return i; 387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius c = rules->charAt(i++); 389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c == 0x27) { 390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i < rules->length() && rules->charAt(i) == 0x27) { 391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Double apostrophe inside quoted literal text, 392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // still encodes a single apostrophe. 393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; 394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius raw.append((UChar)c); 399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(c == 0x5c) { // backslash 401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i == rules->length()) { 402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("backslash escape at the end of the rule string", errorCode); 403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return i; 404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius c = rules->char32At(i); 406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius raw.append(c); 407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i += U16_LENGTH(c); 408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Any other syntax character terminates a string. 410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius --i; 411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(PatternProps::isWhiteSpace(c)) { 414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Unquoted white space terminates a string. 415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius --i; 416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius raw.append((UChar)c); 419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t j = 0; j < raw.length();) { 422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = raw.char32At(j); 423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_IS_SURROGATE(c)) { 424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("string contains an unpaired surrogate", errorCode); 425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return i; 426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(0xfffd <= c && c <= 0xffff) { 428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("string contains U+FFFD, U+FFFE or U+FFFF", errorCode); 429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return i; 430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius j += U16_LENGTH(c); 432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return i; 434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace { 437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const char *const positions[] = { 439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "first tertiary ignorable", 440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "last tertiary ignorable", 441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "first secondary ignorable", 442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "last secondary ignorable", 443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "first primary ignorable", 444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "last primary ignorable", 445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "first variable", 446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "last variable", 447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "first regular", 448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "last regular", 449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "first implicit", 450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "last implicit", 451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "first trailing", 452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "last trailing" 453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} // namespace 456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseSpecialPosition(int32_t i, UnicodeString &str, UErrorCode &errorCode) { 459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return 0; } 460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString raw; 461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t j = readWords(i + 1, raw); 462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(j > i && rules->charAt(j) == 0x5d && !raw.isEmpty()) { // words end with ] 463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++j; 464f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(int32_t pos = 0; pos < UPRV_LENGTHOF(positions); ++pos) { 465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(raw == UnicodeString(positions[pos], -1, US_INV)) { 466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius str.setTo((UChar)POS_LEAD).append((UChar)(POS_BASE + pos)); 467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return j; 468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(raw == UNICODE_STRING_SIMPLE("top")) { 471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius str.setTo((UChar)POS_LEAD).append((UChar)(POS_BASE + LAST_REGULAR)); 472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return j; 473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(raw == UNICODE_STRING_SIMPLE("variable top")) { 475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius str.setTo((UChar)POS_LEAD).append((UChar)(POS_BASE + LAST_VARIABLE)); 476fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return j; 477fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 478fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("not a valid special reset position", errorCode); 480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return i; 481fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseSetting(UErrorCode &errorCode) { 485fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 486fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString raw; 487fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t i = ruleIndex + 1; 488fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t j = readWords(i, raw); 489fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(j <= i || raw.isEmpty()) { 490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("expected a setting/option at '['", errorCode); 491fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(rules->charAt(j) == 0x5d) { // words end with ] 493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++j; 494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(raw.startsWith(UNICODE_STRING_SIMPLE("reorder")) && 495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (raw.length() == 7 || raw.charAt(7) == 0x20)) { 496fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseReordering(raw, errorCode); 497fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 500fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(raw == UNICODE_STRING_SIMPLE("backwards 2")) { 501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings->setFlag(CollationSettings::BACKWARD_SECONDARY, 502fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCOL_ON, 0, errorCode); 503fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 504fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 505fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 506fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString v; 507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t valueIndex = raw.lastIndexOf((UChar)0x20); 508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(valueIndex >= 0) { 509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius v.setTo(raw, valueIndex + 1); 510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius raw.truncate(valueIndex); 511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(raw == UNICODE_STRING_SIMPLE("strength") && v.length() == 1) { 513fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t value = UCOL_DEFAULT; 514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = v.charAt(0); 515fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(0x31 <= c && c <= 0x34) { // 1..4 516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = UCOL_PRIMARY + (c - 0x31); 517fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(c == 0x49) { // 'I' 518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = UCOL_IDENTICAL; 519fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(value != UCOL_DEFAULT) { 521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings->setStrength(value, 0, errorCode); 522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(raw == UNICODE_STRING_SIMPLE("alternate")) { 526fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue value = UCOL_DEFAULT; 527fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(v == UNICODE_STRING_SIMPLE("non-ignorable")) { 528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = UCOL_NON_IGNORABLE; 529fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(v == UNICODE_STRING_SIMPLE("shifted")) { 530fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = UCOL_SHIFTED; 531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 532fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(value != UCOL_DEFAULT) { 533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings->setAlternateHandling(value, 0, errorCode); 534fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 536fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 537fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(raw == UNICODE_STRING_SIMPLE("maxVariable")) { 538fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t value = UCOL_DEFAULT; 539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(v == UNICODE_STRING_SIMPLE("space")) { 540fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = CollationSettings::MAX_VAR_SPACE; 541fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(v == UNICODE_STRING_SIMPLE("punct")) { 542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = CollationSettings::MAX_VAR_PUNCT; 543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(v == UNICODE_STRING_SIMPLE("symbol")) { 544fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = CollationSettings::MAX_VAR_SYMBOL; 545fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(v == UNICODE_STRING_SIMPLE("currency")) { 546fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = CollationSettings::MAX_VAR_CURRENCY; 547fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 548fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(value != UCOL_DEFAULT) { 549fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings->setMaxVariable(value, 0, errorCode); 550fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings->variableTop = baseData->getLastPrimaryForGroup( 551fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCOL_REORDER_CODE_FIRST + value); 552fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(settings->variableTop != 0); 553fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 554fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 555fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 556fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(raw == UNICODE_STRING_SIMPLE("caseFirst")) { 557fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue value = UCOL_DEFAULT; 558fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(v == UNICODE_STRING_SIMPLE("off")) { 559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = UCOL_OFF; 560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(v == UNICODE_STRING_SIMPLE("lower")) { 561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = UCOL_LOWER_FIRST; 562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(v == UNICODE_STRING_SIMPLE("upper")) { 563fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = UCOL_UPPER_FIRST; 564fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 565fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(value != UCOL_DEFAULT) { 566fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings->setCaseFirst(value, 0, errorCode); 567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 570fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(raw == UNICODE_STRING_SIMPLE("caseLevel")) { 571fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue value = getOnOffValue(v); 572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(value != UCOL_DEFAULT) { 573fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings->setFlag(CollationSettings::CASE_LEVEL, value, 0, errorCode); 574fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 575fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 576fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 577fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(raw == UNICODE_STRING_SIMPLE("normalization")) { 578fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue value = getOnOffValue(v); 579fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(value != UCOL_DEFAULT) { 580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings->setFlag(CollationSettings::CHECK_FCD, value, 0, errorCode); 581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 584fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(raw == UNICODE_STRING_SIMPLE("numericOrdering")) { 585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue value = getOnOffValue(v); 586fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(value != UCOL_DEFAULT) { 587fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings->setFlag(CollationSettings::NUMERIC, value, 0, errorCode); 588fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 589fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 590fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 591fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(raw == UNICODE_STRING_SIMPLE("hiraganaQ")) { 592fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue value = getOnOffValue(v); 593fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(value != UCOL_DEFAULT) { 594fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(value == UCOL_ON) { 595fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("[hiraganaQ on] is not supported", errorCode); 596fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 597fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 598fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 599fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 600fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(raw == UNICODE_STRING_SIMPLE("import")) { 601fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CharString lang; 602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius lang.appendInvariantChars(v, errorCode); 603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode == U_MEMORY_ALLOCATION_ERROR) { return; } 604fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // BCP 47 language tag -> ICU locale ID 605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius char localeID[ULOC_FULLNAME_CAPACITY]; 606fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t parsedLength; 607fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length = uloc_forLanguageTag(lang.data(), localeID, ULOC_FULLNAME_CAPACITY, 608fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius &parsedLength, &errorCode); 609fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode) || 610fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parsedLength != lang.length() || length >= ULOC_FULLNAME_CAPACITY) { 611fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_ZERO_ERROR; 612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("expected language tag in [import langTag]", errorCode); 613fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 615fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // localeID minus all keywords 616fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius char baseID[ULOC_FULLNAME_CAPACITY]; 617fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius length = uloc_getBaseName(localeID, baseID, ULOC_FULLNAME_CAPACITY, &errorCode); 618fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode) || length >= ULOC_KEYWORDS_CAPACITY) { 619fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_ZERO_ERROR; 620fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("expected language tag in [import langTag]", errorCode); 621fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 622fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 623f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(length == 3 && uprv_memcmp(baseID, "und", 3) == 0) { 624f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_strcpy(baseID, "root"); 625f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 626fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // @collation=type, or length=0 if not specified 627fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius char collationType[ULOC_KEYWORDS_CAPACITY]; 628fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius length = uloc_getKeywordValue(localeID, "collation", 629fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius collationType, ULOC_KEYWORDS_CAPACITY, 630fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius &errorCode); 631fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode) || length >= ULOC_KEYWORDS_CAPACITY) { 632fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_ZERO_ERROR; 633fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("expected language tag in [import langTag]", errorCode); 634fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 635fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 636fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(importer == NULL) { 637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("[import langTag] is not supported", errorCode); 638fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 639f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UnicodeString importedRules; 640f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius importer->getRules(baseID, length > 0 ? collationType : "standard", 641f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius importedRules, errorReason, errorCode); 642fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { 643fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorReason == NULL) { 644fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorReason = "[import langTag] failed"; 645fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 646fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setErrorContext(); 647fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 648fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 649fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString *outerRules = rules; 650fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t outerRuleIndex = ruleIndex; 651f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius parse(importedRules, errorCode); 652fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { 653fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(parseError != NULL) { 654fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError->offset = outerRuleIndex; 655fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 656fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 657fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rules = outerRules; 658fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 659fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 660fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 661fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 662fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(rules->charAt(j) == 0x5b) { // words end with [ 663fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet set; 664fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius j = parseUnicodeSet(j, set, errorCode); 665fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 666fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(raw == UNICODE_STRING_SIMPLE("optimize")) { 667fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->optimize(set, errorReason, errorCode); 668fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { setErrorContext(); } 669fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 670fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 671fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(raw == UNICODE_STRING_SIMPLE("suppressContractions")) { 672fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sink->suppressContractions(set, errorReason, errorCode); 673fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { setErrorContext(); } 674fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ruleIndex = j; 675fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 676fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 677fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 678fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("not a valid setting/option", errorCode); 679fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 681fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 682fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseReordering(const UnicodeString &raw, UErrorCode &errorCode) { 683fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 684fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t i = 7; // after "reorder" 685fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i == raw.length()) { 686fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // empty [reorder] with no codes 687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius settings->resetReordering(); 688fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 689fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Parse the codes in [reorder aa bb cc]. 691fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector32 reorderCodes(errorCode); 692fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 693fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CharString word; 694fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(i < raw.length()) { 695fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; // skip the word-separating space 696fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t limit = raw.indexOf((UChar)0x20, i); 697fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(limit < 0) { limit = raw.length(); } 698fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius word.clear().appendInvariantChars(raw.tempSubStringBetween(i, limit), errorCode); 699fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 700fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t code = getReorderCode(word.data()); 701fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(code < 0) { 702fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("unknown script or reorder code", errorCode); 703fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 705fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius reorderCodes.addElement(code, errorCode); 706fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 707fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = limit; 708fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 7091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert settings->setReordering(*baseData, reorderCodes.getBuffer(), reorderCodes.size(), errorCode); 710fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 711fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 712fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const char *const gSpecialReorderCodes[] = { 713fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "space", "punct", "symbol", "currency", "digit" 714fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 715fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 716fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 717fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::getReorderCode(const char *word) { 718f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(int32_t i = 0; i < UPRV_LENGTHOF(gSpecialReorderCodes); ++i) { 719fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(uprv_stricmp(word, gSpecialReorderCodes[i]) == 0) { 720fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return UCOL_REORDER_CODE_FIRST + i; 721fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 722fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 723fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word); 724fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(script >= 0) { 725fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return script; 726fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 727f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(uprv_stricmp(word, "others") == 0) { 728f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return UCOL_REORDER_CODE_OTHERS; // same as Zzzz = USCRIPT_UNKNOWN 729fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 730f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return -1; 731fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 732fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 733fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUColAttributeValue 734fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::getOnOffValue(const UnicodeString &s) { 735fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(s == UNICODE_STRING_SIMPLE("on")) { 736fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return UCOL_ON; 737fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(s == UNICODE_STRING_SIMPLE("off")) { 738fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return UCOL_OFF; 739fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 740fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return UCOL_DEFAULT; 741fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 742fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 743fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 744fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 745fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseUnicodeSet(int32_t i, UnicodeSet &set, UErrorCode &errorCode) { 746fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Collect a UnicodeSet pattern between a balanced pair of [brackets]. 747fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t level = 0; 748fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t j = i; 749fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 750fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(j == rules->length()) { 751fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("unbalanced UnicodeSet pattern brackets", errorCode); 752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return j; 753fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 754fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = rules->charAt(j++); 755fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c == 0x5b) { // '[' 756fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++level; 757fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(c == 0x5d) { // ']' 758fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(--level == 0) { break; } 759fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 760fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 761fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius set.applyPattern(rules->tempSubStringBetween(i, j), errorCode); 762fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { 763fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_ZERO_ERROR; 764fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("not a valid UnicodeSet pattern", errorCode); 765fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return j; 766fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 767fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius j = skipWhiteSpace(j); 768fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(j == rules->length() || rules->charAt(j) != 0x5d) { 769fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setParseError("missing option-terminating ']' after UnicodeSet pattern", errorCode); 770fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return j; 771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 772fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ++j; 773fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 774fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 775fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 776fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::readWords(int32_t i, UnicodeString &raw) const { 777fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const UChar sp = 0x20; 778fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius raw.remove(); 779fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = skipWhiteSpace(i); 780fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 781fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i >= rules->length()) { return 0; } 782fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = rules->charAt(i); 783fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isSyntaxChar(c) && c != 0x2d && c != 0x5f) { // syntax except -_ 784fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(raw.isEmpty()) { return i; } 785fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(raw.endsWith(&sp, 1)) { // remove trailing space 786fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius raw.truncate(raw.length() - 1); 787fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 788fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return i; 789fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 790fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(PatternProps::isWhiteSpace(c)) { 791fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius raw.append(0x20); 792fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i = skipWhiteSpace(i + 1); 793fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 794fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius raw.append(c); 795fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; 796fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 797fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 798fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 799fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 800fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 801fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::skipComment(int32_t i) const { 802fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // skip to past the newline 803fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(i < rules->length()) { 804fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = rules->charAt(i++); 805fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // LF or FF or CR or NEL or LS or PS 806fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c == 0xa || c == 0xc || c == 0xd || c == 0x85 || c == 0x2028 || c == 0x2029) { 807fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Unicode Newline Guidelines: "A readline function should stop at NLF, LS, FF, or PS." 808fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // NLF (new line function) = CR or LF or CR+LF or NEL. 809fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // No need to collect all of CR+LF because a following LF will be ignored anyway. 810fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 811fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 812fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 813fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return i; 814fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 815fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 816fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 817fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::setParseError(const char *reason, UErrorCode &errorCode) { 818fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return; } 819fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Error code consistent with the old parser (from ca. 2001), 820fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // rather than U_PARSE_ERROR; 821fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_INVALID_FORMAT_ERROR; 822fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorReason = reason; 823fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(parseError != NULL) { setErrorContext(); } 824fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 825fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 826fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 827fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::setErrorContext() { 828fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(parseError == NULL) { return; } 829fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 830fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Note: This relies on the calling code maintaining the ruleIndex 831fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // at a position that is useful for debugging. 832fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // For example, at the beginning of a reset or relation etc. 833fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError->offset = ruleIndex; 834fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError->line = 0; // We are not counting line numbers. 835fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 836fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // before ruleIndex 837fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t start = ruleIndex - (U_PARSE_CONTEXT_LEN - 1); 838fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(start < 0) { 839fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start = 0; 840fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(start > 0 && U16_IS_TRAIL(rules->charAt(start))) { 841fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++start; 842fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 843fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length = ruleIndex - start; 844fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rules->extract(start, length, parseError->preContext); 845fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError->preContext[length] = 0; 846fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 847fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // starting from ruleIndex 848fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius length = rules->length() - ruleIndex; 849fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(length >= U_PARSE_CONTEXT_LEN) { 850fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius length = U_PARSE_CONTEXT_LEN - 1; 851fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U16_IS_LEAD(rules->charAt(ruleIndex + length - 1))) { 852fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius --length; 853fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 854fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 855fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rules->extract(ruleIndex, length, parseError->postContext); 856fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError->postContext[length] = 0; 857fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 858fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 859fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool 860fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::isSyntaxChar(UChar32 c) { 861fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return 0x21 <= c && c <= 0x7e && 862fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (c <= 0x2f || (0x3a <= c && c <= 0x40) || 863fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (0x5b <= c && c <= 0x60) || (0x7b <= c)); 864fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 865fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 866fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 867fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::skipWhiteSpace(int32_t i) const { 868fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(i < rules->length() && PatternProps::isWhiteSpace(rules->charAt(i))) { 869fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; 870fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 871fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return i; 872fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 873fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 874fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END 875fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 876fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 877