1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2013-2014, International Business Machines
6* Corporation and others.  All Rights Reserved.
7*******************************************************************************
8* collationsets.h
9*
10* created on: 2013feb09
11* created by: Markus W. Scherer
12*/
13
14#ifndef __COLLATIONSETS_H__
15#define __COLLATIONSETS_H__
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_COLLATION
20
21#include "unicode/uniset.h"
22#include "collation.h"
23
24U_NAMESPACE_BEGIN
25
26struct CollationData;
27
28/**
29 * Finds the set of characters and strings that sort differently in the tailoring
30 * from the base data.
31 *
32 * Every mapping in the tailoring needs to be compared to the base,
33 * because some mappings are copied for optimization, and
34 * all contractions for a character are copied if any contractions for that character
35 * are added, modified or removed.
36 *
37 * It might be simpler to re-parse the rule string, but:
38 * - That would require duplicating some of the from-rules builder code.
39 * - That would make the runtime code depend on the builder.
40 * - That would only work if we have the rule string, and we allow users to
41 *   omit the rule string from data files.
42 */
43class TailoredSet : public UMemory {
44public:
45    TailoredSet(UnicodeSet *t)
46            : data(NULL), baseData(NULL),
47              tailored(t),
48              suffix(NULL),
49              errorCode(U_ZERO_ERROR) {}
50
51    void forData(const CollationData *d, UErrorCode &errorCode);
52
53    /**
54     * @return U_SUCCESS(errorCode) in C++, void in Java
55     * @internal only public for access by callback
56     */
57    UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
58
59private:
60    void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
61    void comparePrefixes(UChar32 c, const UChar *p, const UChar *q);
62    void compareContractions(UChar32 c, const UChar *p, const UChar *q);
63
64    void addPrefixes(const CollationData *d, UChar32 c, const UChar *p);
65    void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
66    void addContractions(UChar32 c, const UChar *p);
67    void addSuffix(UChar32 c, const UnicodeString &sfx);
68    void add(UChar32 c);
69
70    /** Prefixes are reversed in the data structure. */
71    void setPrefix(const UnicodeString &pfx) {
72        unreversedPrefix = pfx;
73        unreversedPrefix.reverse();
74    }
75    void resetPrefix() {
76        unreversedPrefix.remove();
77    }
78
79    const CollationData *data;
80    const CollationData *baseData;
81    UnicodeSet *tailored;
82    UnicodeString unreversedPrefix;
83    const UnicodeString *suffix;
84    UErrorCode errorCode;
85};
86
87class ContractionsAndExpansions : public UMemory {
88public:
89    class CESink : public UMemory {
90    public:
91        virtual ~CESink();
92        virtual void handleCE(int64_t ce) = 0;
93        virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
94    };
95
96    ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
97            : data(NULL),
98              contractions(con), expansions(exp),
99              sink(s),
100              addPrefixes(prefixes),
101              checkTailored(0),
102              suffix(NULL),
103              errorCode(U_ZERO_ERROR) {}
104
105    void forData(const CollationData *d, UErrorCode &errorCode);
106    void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
107
108    // all following: @internal, only public for access by callback
109
110    void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
111
112    void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
113    void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
114
115    void addExpansions(UChar32 start, UChar32 end);
116    void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
117
118    /** Prefixes are reversed in the data structure. */
119    void setPrefix(const UnicodeString &pfx) {
120        unreversedPrefix = pfx;
121        unreversedPrefix.reverse();
122    }
123    void resetPrefix() {
124        unreversedPrefix.remove();
125    }
126
127    const CollationData *data;
128    UnicodeSet *contractions;
129    UnicodeSet *expansions;
130    CESink *sink;
131    UBool addPrefixes;
132    int8_t checkTailored;  // -1: collected tailored  +1: exclude tailored
133    UnicodeSet tailored;
134    UnicodeSet ranges;
135    UnicodeString unreversedPrefix;
136    const UnicodeString *suffix;
137    int64_t ces[Collation::MAX_EXPANSION_LENGTH];
138    UErrorCode errorCode;
139};
140
141U_NAMESPACE_END
142
143#endif  // !UCONFIG_NO_COLLATION
144#endif  // __COLLATIONSETS_H__
145