10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2012-2014, International Business Machines
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationdatabuilder.h
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2012apr01
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __COLLATIONDATABUILDER_H__
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __COLLATIONDATABUILDER_H__
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uniset.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/unistr.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uversion.h"
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h"
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h"
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsettings.h"
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h"
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utrie2.h"
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr32.h"
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr64.h"
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvector.h"
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstruct ConditionalCE32;
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CollationFastLatinBuilder;
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CopyHelper;
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass DataBuilderCollationIterator;
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass UCharsTrieBuilder;
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/**
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Low-level CollationData builder.
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Takes (character, CE) pairs and builds them into runtime data structures.
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Supports characters with context prefixes and contraction suffixes.
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API CollationDataBuilder : public UObject {
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Collation element modifier. Interface class for a modifier
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * that changes a tailoring builder's temporary CEs to final CEs.
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Called for every non-special CE32 and every expansion CE.
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    class CEModifier : public UObject {
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    public:
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        virtual ~CEModifier();
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /** Returns a new CE to replace the non-special input CE32, or else Collation::NO_CE. */
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        virtual int64_t modifyCE32(uint32_t ce32) const = 0;
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /** Returns a new CE to replace the input CE, or else Collation::NO_CE. */
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        virtual int64_t modifyCE(int64_t ce) const = 0;
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    };
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationDataBuilder(UErrorCode &errorCode);
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual ~CollationDataBuilder();
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void initForTailoring(const CollationData *b, UErrorCode &errorCode);
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UBool isCompressibleLeadByte(uint32_t b) const;
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    inline UBool isCompressiblePrimary(uint32_t p) const {
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return isCompressibleLeadByte(p >> 24);
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return TRUE if this builder has mappings (e.g., add() has been called)
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool hasMappings() const { return modified; }
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return TRUE if c has CEs in this builder
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool isAssigned(UChar32 c) const;
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return the three-byte primary if c maps to a single such CE and has no context data,
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * otherwise returns 0.
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t getLongPrimaryIfSingleCE(UChar32 c) const;
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return the single CE for c.
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Sets an error code if c does not have a single CE.
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int64_t getSingleCE(UChar32 c, UErrorCode &errorCode) const;
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void add(const UnicodeString &prefix, const UnicodeString &s,
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius             const int64_t ces[], int32_t cesLength,
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius             UErrorCode &errorCode);
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Encodes the ces as either the returned ce32 by itself,
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * or by storing an expansion, with the returned ce32 referring to that.
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * add(p, s, ces, cesLength) = addCE32(p, s, encodeCEs(ces, cesLength))
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void addCE32(const UnicodeString &prefix, const UnicodeString &s,
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                 uint32_t ce32, UErrorCode &errorCode);
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Sets three-byte-primary CEs for a range of code points in code point order,
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * if it is worth doing; otherwise no change is made.
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * None of the code points in the range should have complex mappings so far
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * (expansions/contractions/prefixes).
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param start first code point
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param end last code point (inclusive)
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param primary primary weight for 'start'
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param step per-code point primary-weight increment
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param errorCode ICU in/out error code
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return TRUE if an OFFSET_TAG range was used for start..end
122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool maybeSetPrimaryRange(UChar32 start, UChar32 end,
124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                               uint32_t primary, int32_t step,
125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                               UErrorCode &errorCode);
126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Sets three-byte-primary CEs for a range of code points in code point order.
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Sets range values if that is worth doing, or else individual values.
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * None of the code points in the range should have complex mappings so far
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * (expansions/contractions/prefixes).
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param start first code point
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param end last code point (inclusive)
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param primary primary weight for 'start'
135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param step per-code point primary-weight increment
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param errorCode ICU in/out error code
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return the next primary after 'end': start primary incremented by ((end-start)+1)*step
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t setPrimaryRangeAndReturnNext(UChar32 start, UChar32 end,
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                          uint32_t primary, int32_t step,
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                          UErrorCode &errorCode);
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Copies all mappings from the src builder, with modifications.
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * This builder here must not be built yet, and should be empty.
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void copyFrom(const CollationDataBuilder &src, const CEModifier &modifier,
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  UErrorCode &errorCode);
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void optimize(const UnicodeSet &set, UErrorCode &errorCode);
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void suppressContractions(const UnicodeSet &set, UErrorCode &errorCode);
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void enableFastLatin() { fastLatinEnabled = TRUE; }
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void build(CollationData &data, UErrorCode &errorCode);
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Looks up CEs for s and appends them to the ces array.
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Does not handle normalization: s should be in FCD form.
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Does not write completely ignorable CEs.
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Does not write beyond Collation::MAX_EXPANSION_LENGTH.
162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return incremented cesLength
164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t getCEs(const UnicodeString &s, int64_t ces[], int32_t cesLength);
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t getCEs(const UnicodeString &prefix, const UnicodeString &s,
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                   int64_t ces[], int32_t cesLength);
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected:
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    friend class CopyHelper;
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    friend class DataBuilderCollationIterator;
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t getCE32FromOffsetCE32(UBool fromBase, UChar32 c, uint32_t ce32) const;
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t addCE(int64_t ce, UErrorCode &errorCode);
176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t addCE32(uint32_t ce32, UErrorCode &errorCode);
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t addConditionalCE32(const UnicodeString &context, uint32_t ce32, UErrorCode &errorCode);
178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    inline ConditionalCE32 *getConditionalCE32(int32_t index) const {
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return static_cast<ConditionalCE32 *>(conditionalCE32s[index]);
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    inline ConditionalCE32 *getConditionalCE32ForCE32(uint32_t ce32) const {
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return getConditionalCE32(Collation::indexFromCE32(ce32));
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t makeBuilderContextCE32(int32_t index) {
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return Collation::makeCE32FromTagAndIndex(Collation::BUILDER_DATA_TAG, index);
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline UBool isBuilderContextCE32(uint32_t ce32) {
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return Collation::hasCE32Tag(ce32, Collation::BUILDER_DATA_TAG);
191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t encodeOneCEAsCE32(int64_t ce);
194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t encodeOneCE(int64_t ce, UErrorCode &errorCode);
195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t encodeExpansion(const int64_t ces[], int32_t length, UErrorCode &errorCode);
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t encodeExpansion32(const int32_t newCE32s[], int32_t length, UErrorCode &errorCode);
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t copyFromBaseCE32(UChar32 c, uint32_t ce32, UBool withContext, UErrorCode &errorCode);
199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Copies base contractions to a list of ConditionalCE32.
201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Sets cond->next to the index of the first new item
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * and returns the index of the last new item.
203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t copyContractionsFromBaseCE32(UnicodeString &context, UChar32 c, uint32_t ce32,
205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                         ConditionalCE32 *cond, UErrorCode &errorCode);
206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool getJamoCE32s(uint32_t jamoCE32s[], UErrorCode &errorCode);
208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void setDigitTags(UErrorCode &errorCode);
209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void setLeadSurrogates(UErrorCode &errorCode);
210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void buildMappings(CollationData &data, UErrorCode &errorCode);
212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void clearContexts();
214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void buildContexts(UErrorCode &errorCode);
215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t buildContext(ConditionalCE32 *head, UErrorCode &errorCode);
216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t addContextTrie(uint32_t defaultCE32, UCharsTrieBuilder &trieBuilder,
217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           UErrorCode &errorCode);
218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void buildFastLatinTable(CollationData &data, UErrorCode &errorCode);
220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t getCEs(const UnicodeString &s, int32_t start, int64_t ces[], int32_t cesLength);
222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static UChar32 jamoCpFromIndex(int32_t i) {
224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // 0 <= i < CollationData::JAMO_CE32S_LENGTH = 19 + 21 + 27
225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i < Hangul::JAMO_L_COUNT) { return Hangul::JAMO_L_BASE + i; }
226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        i -= Hangul::JAMO_L_COUNT;
227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i < Hangul::JAMO_V_COUNT) { return Hangul::JAMO_V_BASE + i; }
228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        i -= Hangul::JAMO_V_COUNT;
229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // i < 27
230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return Hangul::JAMO_T_BASE + 1 + i;
231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** @see Collation::BUILDER_DATA_TAG */
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t IS_BUILDER_JAMO_CE32 = 0x100;
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const Normalizer2Impl &nfcImpl;
237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationData *base;
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationSettings *baseSettings;
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UTrie2 *trie;
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UVector32 ce32s;
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UVector64 ce64s;
242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UVector conditionalCE32s;  // vector of ConditionalCE32
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Characters that have context (prefixes or contraction suffixes).
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet contextChars;
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Serialized UCharsTrie structures for finalized contexts.
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString contexts;
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet unsafeBackwardSet;
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool modified;
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool fastLatinEnabled;
251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationFastLatinBuilder *fastLatinBuilder;
252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    DataBuilderCollationIterator *collIter;
254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // __COLLATIONDATABUILDER_H__
260