1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
31b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2013-2015, International Business Machines
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationruleparser.cpp
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* (replaced the former ucol_tok.cpp)
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2013apr10
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/normalizer2.h"
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/parseerr.h"
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uchar.h"
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ucol.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uloc.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/unistr.h"
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utf16.h"
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "charstr.h"
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h"
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h"
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h"
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationruleparser.h"
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsettings.h"
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationtailoring.h"
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cstring.h"
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "patternprops.h"
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h"
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr32.h"
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace {
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const UChar BEFORE[] = { 0x5b, 0x62, 0x65, 0x66, 0x6f, 0x72, 0x65, 0 };  // "[before"
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusconst int32_t BEFORE_LENGTH = 7;
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}  // namespace
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::Sink::~Sink() {}
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::Sink::suppressContractions(const UnicodeSet &, const char *&, UErrorCode &) {}
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::Sink::optimize(const UnicodeSet &, const char *&, UErrorCode &) {}
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::Importer::~Importer() {}
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::CollationRuleParser(const CollationData *base, UErrorCode &errorCode)
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        : nfd(*Normalizer2::getNFDInstance(errorCode)),
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          nfc(*Normalizer2::getNFCInstance(errorCode)),
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          rules(NULL), baseData(base), settings(NULL),
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          parseError(NULL), errorReason(NULL),
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          sink(NULL), importer(NULL),
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          ruleIndex(0) {
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::~CollationRuleParser() {
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parse(const UnicodeString &ruleString,
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           CollationSettings &outSettings,
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           UParseError *outParseError,
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           UErrorCode &errorCode) {
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    settings = &outSettings;
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    parseError = outParseError;
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(parseError != NULL) {
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        parseError->line = 0;
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        parseError->offset = -1;
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        parseError->preContext[0] = 0;
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        parseError->postContext[0] = 0;
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    errorReason = NULL;
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    parse(ruleString, errorCode);
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parse(const UnicodeString &ruleString, UErrorCode &errorCode) {
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    rules = &ruleString;
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ruleIndex = 0;
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(ruleIndex < rules->length()) {
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar c = rules->charAt(ruleIndex);
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(PatternProps::isWhiteSpace(c)) {
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++ruleIndex;
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            continue;
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        switch(c) {
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case 0x26:  // '&'
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            parseRuleChain(errorCode);
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case 0x5b:  // '['
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            parseSetting(errorCode);
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case 0x23:  // '#' starts a comment, until the end of the line
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ruleIndex = skipComment(ruleIndex + 1);
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case 0x40:  // '@' is equivalent to [backwards 2]
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            settings->setFlag(CollationSettings::BACKWARD_SECONDARY,
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                              UCOL_ON, 0, errorCode);
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++ruleIndex;
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case 0x21:  // '!' used to turn on Thai/Lao character reversal
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Accept but ignore. The root collator has contractions
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // that are equivalent to the character reversal, where appropriate.
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++ruleIndex;
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        default:
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setParseError("expected a reset or setting or comment", errorCode);
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode)) { return; }
123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseRuleChain(UErrorCode &errorCode) {
128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t resetStrength = parseResetAndPosition(errorCode);
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool isFirstRelation = TRUE;
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t result = parseRelationOperator(errorCode);
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode)) { return; }
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(result < 0) {
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(ruleIndex < rules->length() && rules->charAt(ruleIndex) == 0x23) {
135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // '#' starts a comment, until the end of the line
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ruleIndex = skipComment(ruleIndex + 1);
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                continue;
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(isFirstRelation) {
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                setParseError("reset not followed by a relation", errorCode);
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t strength = result & STRENGTH_MASK;
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(resetStrength < UCOL_IDENTICAL) {
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // reset-before rule chain
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(isFirstRelation) {
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(strength != resetStrength) {
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    setParseError("reset-before strength differs from its first relation", errorCode);
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    return;
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(strength < resetStrength) {
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    setParseError("reset-before strength followed by a stronger relation", errorCode);
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    return;
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t i = ruleIndex + (result >> OFFSET_SHIFT);  // skip over the relation operator
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((result & STARRED_FLAG) == 0) {
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            parseRelationStrings(strength, i, errorCode);
162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            parseStarredCharacters(strength, i, errorCode);
164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode)) { return; }
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        isFirstRelation = FALSE;
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseResetAndPosition(UErrorCode &errorCode) {
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t i = skipWhiteSpace(ruleIndex + 1);
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t j;
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UChar c;
176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t resetStrength;
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(rules->compare(i, BEFORE_LENGTH, BEFORE, 0, BEFORE_LENGTH) == 0 &&
178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            (j = i + BEFORE_LENGTH) < rules->length() &&
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            PatternProps::isWhiteSpace(rules->charAt(j)) &&
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ((j = skipWhiteSpace(j + 1)) + 1) < rules->length() &&
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            0x31 <= (c = rules->charAt(j)) && c <= 0x33 &&
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            rules->charAt(j + 1) == 0x5d) {
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // &[before n] with n=1 or 2 or 3
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        resetStrength = UCOL_PRIMARY + (c - 0x31);
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        i = skipWhiteSpace(j + 2);
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        resetStrength = UCOL_IDENTICAL;
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(i >= rules->length()) {
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        setParseError("reset without position", errorCode);
191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return UCOL_DEFAULT;
192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString str;
194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(rules->charAt(i) == 0x5b) {  // '['
195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        i = parseSpecialPosition(i, str, errorCode);
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        i = parseTailoringString(i, str, errorCode);
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    sink->addReset(resetStrength, str, errorReason, errorCode);
200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { setErrorContext(); }
201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ruleIndex = i;
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return resetStrength;
203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseRelationOperator(UErrorCode &errorCode) {
207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ruleIndex = skipWhiteSpace(ruleIndex);
209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(ruleIndex >= rules->length()) { return UCOL_DEFAULT; }
210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t strength;
211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t i = ruleIndex;
212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UChar c = rules->charAt(i++);
213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    switch(c) {
214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    case 0x3c:  // '<'
215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i < rules->length() && rules->charAt(i) == 0x3c) {  // <<
216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++i;
217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(i < rules->length() && rules->charAt(i) == 0x3c) {  // <<<
218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ++i;
219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(i < rules->length() && rules->charAt(i) == 0x3c) {  // <<<<
220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    ++i;
221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    strength = UCOL_QUATERNARY;
222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    strength = UCOL_TERTIARY;
224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                strength = UCOL_SECONDARY;
227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            strength = UCOL_PRIMARY;
230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i < rules->length() && rules->charAt(i) == 0x2a) {  // '*'
232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++i;
233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            strength |= STARRED_FLAG;
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        break;
236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    case 0x3b:  // ';' same as <<
237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        strength = UCOL_SECONDARY;
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        break;
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    case 0x2c:  // ',' same as <<<
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        strength = UCOL_TERTIARY;
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        break;
242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    case 0x3d:  // '='
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        strength = UCOL_IDENTICAL;
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i < rules->length() && rules->charAt(i) == 0x2a) {  // '*'
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++i;
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            strength |= STARRED_FLAG;
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        break;
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    default:
250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return UCOL_DEFAULT;
251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return ((i - ruleIndex) << OFFSET_SHIFT) | strength;
253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseRelationStrings(int32_t strength, int32_t i, UErrorCode &errorCode) {
257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Parse
258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    //     prefix | str / extension
259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // where prefix and extension are optional.
260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString prefix, str, extension;
261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    i = parseTailoringString(i, str, errorCode);
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UChar next = (i < rules->length()) ? rules->charAt(i) : 0;
264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(next == 0x7c) {  // '|' separates the context prefix from the string.
265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        prefix = str;
266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        i = parseTailoringString(i + 1, str, errorCode);
267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode)) { return; }
268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        next = (i < rules->length()) ? rules->charAt(i) : 0;
269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(next == 0x2f) {  // '/' separates the string from the extension.
271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        i = parseTailoringString(i + 1, extension, errorCode);
272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(!prefix.isEmpty()) {
274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 prefix0 = prefix.char32At(0);
275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c = str.char32At(0);
276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!nfc.hasBoundaryBefore(prefix0) || !nfc.hasBoundaryBefore(c)) {
277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setParseError("in 'prefix|str', prefix and str must each start with an NFC boundary",
278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                          errorCode);
279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    sink->addRelation(strength, prefix, str, extension, errorReason, errorCode);
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { setErrorContext(); }
284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ruleIndex = i;
285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseStarredCharacters(int32_t strength, int32_t i, UErrorCode &errorCode) {
289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString empty, raw;
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    i = parseString(skipWhiteSpace(i), raw, errorCode);
291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(raw.isEmpty()) {
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        setParseError("missing starred-relation string", errorCode);
294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UChar32 prev = -1;
297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t j = 0;
298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        while(j < raw.length()) {
300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UChar32 c = raw.char32At(j);
301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(!nfd.isInert(c)) {
302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                setParseError("starred-relation string is not all NFD-inert", errorCode);
303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            sink->addRelation(strength, empty, UnicodeString(c), empty, errorReason, errorCode);
306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(U_FAILURE(errorCode)) {
307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                setErrorContext();
308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            j += U16_LENGTH(c);
311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            prev = c;
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i >= rules->length() || rules->charAt(i) != 0x2d) {  // '-'
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(prev < 0) {
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setParseError("range without start in starred-relation string", errorCode);
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        i = parseString(i + 1, raw, errorCode);
321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode)) { return; }
322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(raw.isEmpty()) {
323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setParseError("range without end in starred-relation string", errorCode);
324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c = raw.char32At(0);
327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(c < prev) {
328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setParseError("range start greater than end in starred-relation string", errorCode);
329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // range prev-c
332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UnicodeString s;
333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        while(++prev <= c) {
334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(!nfd.isInert(prev)) {
335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                setParseError("starred-relation string range is not all NFD-inert", errorCode);
336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(U_IS_SURROGATE(prev)) {
339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                setParseError("starred-relation string range contains a surrogate", errorCode);
340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(0xfffd <= prev && prev <= 0xffff) {
343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                setParseError("starred-relation string range contains U+FFFD, U+FFFE or U+FFFF", errorCode);
344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            s.setTo(prev);
347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            sink->addRelation(strength, empty, s, empty, errorReason, errorCode);
348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(U_FAILURE(errorCode)) {
349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                setErrorContext();
350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        prev = -1;
354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        j = U16_LENGTH(c);
355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ruleIndex = skipWhiteSpace(i);
357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseTailoringString(int32_t i, UnicodeString &raw, UErrorCode &errorCode) {
361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    i = parseString(skipWhiteSpace(i), raw, errorCode);
362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_SUCCESS(errorCode) && raw.isEmpty()) {
363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        setParseError("missing relation string", errorCode);
364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return skipWhiteSpace(i);
366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseString(int32_t i, UnicodeString &raw, UErrorCode &errorCode) {
370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return i; }
371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    raw.remove();
372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(i < rules->length()) {
373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c = rules->charAt(i++);
374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(isSyntaxChar(c)) {
375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(c == 0x27) {  // apostrophe
376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(i < rules->length() && rules->charAt(i) == 0x27) {
377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // Double apostrophe, encodes a single one.
378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    raw.append((UChar)0x27);
379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    ++i;
380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    continue;
381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Quote literal text until the next single apostrophe.
383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                for(;;) {
384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(i == rules->length()) {
385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        setParseError("quoted literal text missing terminating apostrophe", errorCode);
386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        return i;
387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    c = rules->charAt(i++);
389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(c == 0x27) {
390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        if(i < rules->length() && rules->charAt(i) == 0x27) {
391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            // Double apostrophe inside quoted literal text,
392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            // still encodes a single apostrophe.
393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            ++i;
394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        } else {
395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            break;
396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        }
397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    raw.append((UChar)c);
399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(c == 0x5c) {  // backslash
401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(i == rules->length()) {
402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    setParseError("backslash escape at the end of the rule string", errorCode);
403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    return i;
404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                c = rules->char32At(i);
406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                raw.append(c);
407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                i += U16_LENGTH(c);
408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Any other syntax character terminates a string.
410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                --i;
411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                break;
412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(PatternProps::isWhiteSpace(c)) {
414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Unquoted white space terminates a string.
415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            --i;
416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            raw.append((UChar)c);
419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t j = 0; j < raw.length();) {
422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c = raw.char32At(j);
423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_IS_SURROGATE(c)) {
424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setParseError("string contains an unpaired surrogate", errorCode);
425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return i;
426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(0xfffd <= c && c <= 0xffff) {
428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setParseError("string contains U+FFFD, U+FFFE or U+FFFF", errorCode);
429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return i;
430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        j += U16_LENGTH(c);
432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return i;
434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace {
437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const char *const positions[] = {
439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "first tertiary ignorable",
440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "last tertiary ignorable",
441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "first secondary ignorable",
442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "last secondary ignorable",
443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "first primary ignorable",
444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "last primary ignorable",
445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "first variable",
446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "last variable",
447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "first regular",
448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "last regular",
449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "first implicit",
450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "last implicit",
451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "first trailing",
452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "last trailing"
453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}  // namespace
456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseSpecialPosition(int32_t i, UnicodeString &str, UErrorCode &errorCode) {
459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return 0; }
460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString raw;
461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t j = readWords(i + 1, raw);
462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(j > i && rules->charAt(j) == 0x5d && !raw.isEmpty()) {  // words end with ]
463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ++j;
464f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        for(int32_t pos = 0; pos < UPRV_LENGTHOF(positions); ++pos) {
465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(raw == UnicodeString(positions[pos], -1, US_INV)) {
466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                str.setTo((UChar)POS_LEAD).append((UChar)(POS_BASE + pos));
467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return j;
468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(raw == UNICODE_STRING_SIMPLE("top")) {
471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            str.setTo((UChar)POS_LEAD).append((UChar)(POS_BASE + LAST_REGULAR));
472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return j;
473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(raw == UNICODE_STRING_SIMPLE("variable top")) {
475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            str.setTo((UChar)POS_LEAD).append((UChar)(POS_BASE + LAST_VARIABLE));
476fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return j;
477fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
478fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    setParseError("not a valid special reset position", errorCode);
480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return i;
481fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseSetting(UErrorCode &errorCode) {
485fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
486fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString raw;
487fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t i = ruleIndex + 1;
488fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t j = readWords(i, raw);
489fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(j <= i || raw.isEmpty()) {
490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        setParseError("expected a setting/option at '['", errorCode);
491fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(rules->charAt(j) == 0x5d) {  // words end with ]
493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ++j;
494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(raw.startsWith(UNICODE_STRING_SIMPLE("reorder")) &&
495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                (raw.length() == 7 || raw.charAt(7) == 0x20)) {
496fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            parseReordering(raw, errorCode);
497fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ruleIndex = j;
498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
500fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(raw == UNICODE_STRING_SIMPLE("backwards 2")) {
501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            settings->setFlag(CollationSettings::BACKWARD_SECONDARY,
502fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                              UCOL_ON, 0, errorCode);
503fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ruleIndex = j;
504fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
505fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
506fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UnicodeString v;
507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t valueIndex = raw.lastIndexOf((UChar)0x20);
508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(valueIndex >= 0) {
509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            v.setTo(raw, valueIndex + 1);
510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            raw.truncate(valueIndex);
511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(raw == UNICODE_STRING_SIMPLE("strength") && v.length() == 1) {
513fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int32_t value = UCOL_DEFAULT;
514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UChar c = v.charAt(0);
515fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(0x31 <= c && c <= 0x34) {  // 1..4
516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                value = UCOL_PRIMARY + (c - 0x31);
517fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(c == 0x49) {  // 'I'
518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                value = UCOL_IDENTICAL;
519fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(value != UCOL_DEFAULT) {
521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                settings->setStrength(value, 0, errorCode);
522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ruleIndex = j;
523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(raw == UNICODE_STRING_SIMPLE("alternate")) {
526fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UColAttributeValue value = UCOL_DEFAULT;
527fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(v == UNICODE_STRING_SIMPLE("non-ignorable")) {
528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                value = UCOL_NON_IGNORABLE;
529fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(v == UNICODE_STRING_SIMPLE("shifted")) {
530fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                value = UCOL_SHIFTED;
531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
532fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(value != UCOL_DEFAULT) {
533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                settings->setAlternateHandling(value, 0, errorCode);
534fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ruleIndex = j;
535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
536fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
537fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(raw == UNICODE_STRING_SIMPLE("maxVariable")) {
538fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int32_t value = UCOL_DEFAULT;
539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(v == UNICODE_STRING_SIMPLE("space")) {
540fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                value = CollationSettings::MAX_VAR_SPACE;
541fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(v == UNICODE_STRING_SIMPLE("punct")) {
542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                value = CollationSettings::MAX_VAR_PUNCT;
543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(v == UNICODE_STRING_SIMPLE("symbol")) {
544fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                value = CollationSettings::MAX_VAR_SYMBOL;
545fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(v == UNICODE_STRING_SIMPLE("currency")) {
546fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                value = CollationSettings::MAX_VAR_CURRENCY;
547fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
548fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(value != UCOL_DEFAULT) {
549fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                settings->setMaxVariable(value, 0, errorCode);
550fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                settings->variableTop = baseData->getLastPrimaryForGroup(
551fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    UCOL_REORDER_CODE_FIRST + value);
552fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                U_ASSERT(settings->variableTop != 0);
553fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ruleIndex = j;
554fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
555fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
556fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(raw == UNICODE_STRING_SIMPLE("caseFirst")) {
557fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UColAttributeValue value = UCOL_DEFAULT;
558fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(v == UNICODE_STRING_SIMPLE("off")) {
559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                value = UCOL_OFF;
560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(v == UNICODE_STRING_SIMPLE("lower")) {
561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                value = UCOL_LOWER_FIRST;
562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(v == UNICODE_STRING_SIMPLE("upper")) {
563fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                value = UCOL_UPPER_FIRST;
564fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
565fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(value != UCOL_DEFAULT) {
566fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                settings->setCaseFirst(value, 0, errorCode);
567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ruleIndex = j;
568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
570fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(raw == UNICODE_STRING_SIMPLE("caseLevel")) {
571fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UColAttributeValue value = getOnOffValue(v);
572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(value != UCOL_DEFAULT) {
573fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                settings->setFlag(CollationSettings::CASE_LEVEL, value, 0, errorCode);
574fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ruleIndex = j;
575fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
576fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
577fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(raw == UNICODE_STRING_SIMPLE("normalization")) {
578fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UColAttributeValue value = getOnOffValue(v);
579fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(value != UCOL_DEFAULT) {
580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                settings->setFlag(CollationSettings::CHECK_FCD, value, 0, errorCode);
581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ruleIndex = j;
582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
584fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(raw == UNICODE_STRING_SIMPLE("numericOrdering")) {
585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UColAttributeValue value = getOnOffValue(v);
586fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(value != UCOL_DEFAULT) {
587fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                settings->setFlag(CollationSettings::NUMERIC, value, 0, errorCode);
588fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ruleIndex = j;
589fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
590fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
591fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(raw == UNICODE_STRING_SIMPLE("hiraganaQ")) {
592fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UColAttributeValue value = getOnOffValue(v);
593fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(value != UCOL_DEFAULT) {
594fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(value == UCOL_ON) {
595fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    setParseError("[hiraganaQ on] is not supported", errorCode);
596fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
597fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ruleIndex = j;
598fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
599fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
600fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(raw == UNICODE_STRING_SIMPLE("import")) {
601fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            CharString lang;
602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            lang.appendInvariantChars(v, errorCode);
603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(errorCode == U_MEMORY_ALLOCATION_ERROR) { return; }
604fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // BCP 47 language tag -> ICU locale ID
605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            char localeID[ULOC_FULLNAME_CAPACITY];
606fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int32_t parsedLength;
607fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int32_t length = uloc_forLanguageTag(lang.data(), localeID, ULOC_FULLNAME_CAPACITY,
608fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                                 &parsedLength, &errorCode);
609fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(U_FAILURE(errorCode) ||
610fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    parsedLength != lang.length() || length >= ULOC_FULLNAME_CAPACITY) {
611fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errorCode = U_ZERO_ERROR;
612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                setParseError("expected language tag in [import langTag]", errorCode);
613fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
615fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // localeID minus all keywords
616fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            char baseID[ULOC_FULLNAME_CAPACITY];
617fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            length = uloc_getBaseName(localeID, baseID, ULOC_FULLNAME_CAPACITY, &errorCode);
618fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(U_FAILURE(errorCode) || length >= ULOC_KEYWORDS_CAPACITY) {
619fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errorCode = U_ZERO_ERROR;
620fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                setParseError("expected language tag in [import langTag]", errorCode);
621fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
622fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
623f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            if(length == 3 && uprv_memcmp(baseID, "und", 3) == 0) {
624f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                uprv_strcpy(baseID, "root");
625f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            }
626fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // @collation=type, or length=0 if not specified
627fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            char collationType[ULOC_KEYWORDS_CAPACITY];
628fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            length = uloc_getKeywordValue(localeID, "collation",
629fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                          collationType, ULOC_KEYWORDS_CAPACITY,
630fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                          &errorCode);
631fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(U_FAILURE(errorCode) || length >= ULOC_KEYWORDS_CAPACITY) {
632fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errorCode = U_ZERO_ERROR;
633fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                setParseError("expected language tag in [import langTag]", errorCode);
634fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
635fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
636fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(importer == NULL) {
637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                setParseError("[import langTag] is not supported", errorCode);
638fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
639f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                UnicodeString importedRules;
640f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                importer->getRules(baseID, length > 0 ? collationType : "standard",
641f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                                   importedRules, errorReason, errorCode);
642fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(U_FAILURE(errorCode)) {
643fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(errorReason == NULL) {
644fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        errorReason = "[import langTag] failed";
645fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
646fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    setErrorContext();
647fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    return;
648fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
649fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                const UnicodeString *outerRules = rules;
650fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                int32_t outerRuleIndex = ruleIndex;
651f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                parse(importedRules, errorCode);
652fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(U_FAILURE(errorCode)) {
653fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(parseError != NULL) {
654fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        parseError->offset = outerRuleIndex;
655fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
656fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
657fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                rules = outerRules;
658fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ruleIndex = j;
659fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
660fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
661fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
662fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(rules->charAt(j) == 0x5b) {  // words end with [
663fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UnicodeSet set;
664fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        j = parseUnicodeSet(j, set, errorCode);
665fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode)) { return; }
666fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(raw == UNICODE_STRING_SIMPLE("optimize")) {
667fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            sink->optimize(set, errorReason, errorCode);
668fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(U_FAILURE(errorCode)) { setErrorContext(); }
669fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ruleIndex = j;
670fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
671fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(raw == UNICODE_STRING_SIMPLE("suppressContractions")) {
672fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            sink->suppressContractions(set, errorReason, errorCode);
673fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(U_FAILURE(errorCode)) { setErrorContext(); }
674fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ruleIndex = j;
675fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
676fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
677fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
678fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    setParseError("not a valid setting/option", errorCode);
679fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
681fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
682fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseReordering(const UnicodeString &raw, UErrorCode &errorCode) {
683fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
684fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t i = 7;  // after "reorder"
685fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(i == raw.length()) {
686fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // empty [reorder] with no codes
687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        settings->resetReordering();
688fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
689fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Parse the codes in [reorder aa bb cc].
691fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UVector32 reorderCodes(errorCode);
692fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
693fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CharString word;
694fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(i < raw.length()) {
695fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ++i;  // skip the word-separating space
696fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t limit = raw.indexOf((UChar)0x20, i);
697fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(limit < 0) { limit = raw.length(); }
698fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        word.clear().appendInvariantChars(raw.tempSubStringBetween(i, limit), errorCode);
699fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode)) { return; }
700fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t code = getReorderCode(word.data());
701fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(code < 0) {
702fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setParseError("unknown script or reorder code", errorCode);
703fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
705fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        reorderCodes.addElement(code, errorCode);
706fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode)) { return; }
707fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        i = limit;
708fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
7091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    settings->setReordering(*baseData, reorderCodes.getBuffer(), reorderCodes.size(), errorCode);
710fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
711fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
712fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const char *const gSpecialReorderCodes[] = {
713fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    "space", "punct", "symbol", "currency", "digit"
714fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
715fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
716fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
717fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::getReorderCode(const char *word) {
718f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for(int32_t i = 0; i < UPRV_LENGTHOF(gSpecialReorderCodes); ++i) {
719fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(uprv_stricmp(word, gSpecialReorderCodes[i]) == 0) {
720fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return UCOL_REORDER_CODE_FIRST + i;
721fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
722fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
723fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word);
724fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(script >= 0) {
725fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return script;
726fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
727f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if(uprv_stricmp(word, "others") == 0) {
728f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return UCOL_REORDER_CODE_OTHERS;  // same as Zzzz = USCRIPT_UNKNOWN
729fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
730f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    return -1;
731fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
732fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
733fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUColAttributeValue
734fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::getOnOffValue(const UnicodeString &s) {
735fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(s == UNICODE_STRING_SIMPLE("on")) {
736fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return UCOL_ON;
737fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(s == UNICODE_STRING_SIMPLE("off")) {
738fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return UCOL_OFF;
739fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
740fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return UCOL_DEFAULT;
741fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
742fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
743fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
744fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
745fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::parseUnicodeSet(int32_t i, UnicodeSet &set, UErrorCode &errorCode) {
746fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Collect a UnicodeSet pattern between a balanced pair of [brackets].
747fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t level = 0;
748fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t j = i;
749fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
750fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(j == rules->length()) {
751fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setParseError("unbalanced UnicodeSet pattern brackets", errorCode);
752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return j;
753fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
754fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar c = rules->charAt(j++);
755fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(c == 0x5b) {  // '['
756fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++level;
757fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(c == 0x5d) {  // ']'
758fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(--level == 0) { break; }
759fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
760fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
761fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    set.applyPattern(rules->tempSubStringBetween(i, j), errorCode);
762fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) {
763fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_ZERO_ERROR;
764fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        setParseError("not a valid UnicodeSet pattern", errorCode);
765fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return j;
766fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
767fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    j = skipWhiteSpace(j);
768fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(j == rules->length() || rules->charAt(j) != 0x5d) {
769fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        setParseError("missing option-terminating ']' after UnicodeSet pattern", errorCode);
770fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return j;
771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
772fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return ++j;
773fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
774fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
775fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
776fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::readWords(int32_t i, UnicodeString &raw) const {
777fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const UChar sp = 0x20;
778fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    raw.remove();
779fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    i = skipWhiteSpace(i);
780fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
781fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i >= rules->length()) { return 0; }
782fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar c = rules->charAt(i);
783fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(isSyntaxChar(c) && c != 0x2d && c != 0x5f) {  // syntax except -_
784fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(raw.isEmpty()) { return i; }
785fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(raw.endsWith(&sp, 1)) {  // remove trailing space
786fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                raw.truncate(raw.length() - 1);
787fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
788fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return i;
789fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
790fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(PatternProps::isWhiteSpace(c)) {
791fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            raw.append(0x20);
792fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            i = skipWhiteSpace(i + 1);
793fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
794fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            raw.append(c);
795fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++i;
796fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
797fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
798fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
799fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
800fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
801fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::skipComment(int32_t i) const {
802fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // skip to past the newline
803fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(i < rules->length()) {
804fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar c = rules->charAt(i++);
805fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // LF or FF or CR or NEL or LS or PS
806fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(c == 0xa || c == 0xc || c == 0xd || c == 0x85 || c == 0x2028 || c == 0x2029) {
807fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Unicode Newline Guidelines: "A readline function should stop at NLF, LS, FF, or PS."
808fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // NLF (new line function) = CR or LF or CR+LF or NEL.
809fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // No need to collect all of CR+LF because a following LF will be ignored anyway.
810fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
811fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
812fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
813fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return i;
814fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
815fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
816fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
817fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::setParseError(const char *reason, UErrorCode &errorCode) {
818fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
819fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Error code consistent with the old parser (from ca. 2001),
820fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // rather than U_PARSE_ERROR;
821fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    errorCode = U_INVALID_FORMAT_ERROR;
822fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    errorReason = reason;
823fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(parseError != NULL) { setErrorContext(); }
824fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
825fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
826fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
827fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::setErrorContext() {
828fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(parseError == NULL) { return; }
829fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
830fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Note: This relies on the calling code maintaining the ruleIndex
831fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // at a position that is useful for debugging.
832fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // For example, at the beginning of a reset or relation etc.
833fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    parseError->offset = ruleIndex;
834fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    parseError->line = 0;  // We are not counting line numbers.
835fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
836fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // before ruleIndex
837fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t start = ruleIndex - (U_PARSE_CONTEXT_LEN - 1);
838fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(start < 0) {
839fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        start = 0;
840fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(start > 0 && U16_IS_TRAIL(rules->charAt(start))) {
841fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ++start;
842fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
843fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t length = ruleIndex - start;
844fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    rules->extract(start, length, parseError->preContext);
845fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    parseError->preContext[length] = 0;
846fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
847fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // starting from ruleIndex
848fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = rules->length() - ruleIndex;
849fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= U_PARSE_CONTEXT_LEN) {
850fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        length = U_PARSE_CONTEXT_LEN - 1;
851fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U16_IS_LEAD(rules->charAt(ruleIndex + length - 1))) {
852fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            --length;
853fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
854fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
855fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    rules->extract(ruleIndex, length, parseError->postContext);
856fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    parseError->postContext[length] = 0;
857fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
858fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
859fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool
860fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::isSyntaxChar(UChar32 c) {
861fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return 0x21 <= c && c <= 0x7e &&
862fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            (c <= 0x2f || (0x3a <= c && c <= 0x40) ||
863fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            (0x5b <= c && c <= 0x60) || (0x7b <= c));
864fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
865fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
866fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
867fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationRuleParser::skipWhiteSpace(int32_t i) const {
868fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(i < rules->length() && PatternProps::isWhiteSpace(rules->charAt(i))) {
869fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ++i;
870fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
871fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return i;
872fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
873fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
874fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
875fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
876fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
877