1/*
2*******************************************************************************
3* Copyright (C) 2013-2014, International Business Machines
4* Corporation and others.  All Rights Reserved.
5*******************************************************************************
6* collationsets.h
7*
8* created on: 2013feb09
9* created by: Markus W. Scherer
10*/
11
12#ifndef __COLLATIONSETS_H__
13#define __COLLATIONSETS_H__
14
15#include "unicode/utypes.h"
16
17#if !UCONFIG_NO_COLLATION
18
19#include "unicode/uniset.h"
20#include "collation.h"
21
22U_NAMESPACE_BEGIN
23
24struct CollationData;
25
26/**
27 * Finds the set of characters and strings that sort differently in the tailoring
28 * from the base data.
29 *
30 * Every mapping in the tailoring needs to be compared to the base,
31 * because some mappings are copied for optimization, and
32 * all contractions for a character are copied if any contractions for that character
33 * are added, modified or removed.
34 *
35 * It might be simpler to re-parse the rule string, but:
36 * - That would require duplicating some of the from-rules builder code.
37 * - That would make the runtime code depend on the builder.
38 * - That would only work if we have the rule string, and we allow users to
39 *   omit the rule string from data files.
40 */
41class TailoredSet : public UMemory {
42public:
43    TailoredSet(UnicodeSet *t)
44            : data(NULL), baseData(NULL),
45              tailored(t),
46              suffix(NULL),
47              errorCode(U_ZERO_ERROR) {}
48
49    void forData(const CollationData *d, UErrorCode &errorCode);
50
51    /**
52     * @return U_SUCCESS(errorCode) in C++, void in Java
53     * @internal only public for access by callback
54     */
55    UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
56
57private:
58    void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
59    void comparePrefixes(UChar32 c, const UChar *p, const UChar *q);
60    void compareContractions(UChar32 c, const UChar *p, const UChar *q);
61
62    void addPrefixes(const CollationData *d, UChar32 c, const UChar *p);
63    void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
64    void addContractions(UChar32 c, const UChar *p);
65    void addSuffix(UChar32 c, const UnicodeString &sfx);
66    void add(UChar32 c);
67
68    /** Prefixes are reversed in the data structure. */
69    void setPrefix(const UnicodeString &pfx) {
70        unreversedPrefix = pfx;
71        unreversedPrefix.reverse();
72    }
73    void resetPrefix() {
74        unreversedPrefix.remove();
75    }
76
77    const CollationData *data;
78    const CollationData *baseData;
79    UnicodeSet *tailored;
80    UnicodeString unreversedPrefix;
81    const UnicodeString *suffix;
82    UErrorCode errorCode;
83};
84
85class ContractionsAndExpansions : public UMemory {
86public:
87    class CESink : public UMemory {
88    public:
89        virtual ~CESink();
90        virtual void handleCE(int64_t ce) = 0;
91        virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
92    };
93
94    ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
95            : data(NULL),
96              contractions(con), expansions(exp),
97              sink(s),
98              addPrefixes(prefixes),
99              checkTailored(0),
100              suffix(NULL),
101              errorCode(U_ZERO_ERROR) {}
102
103    void forData(const CollationData *d, UErrorCode &errorCode);
104    void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
105
106    // all following: @internal, only public for access by callback
107
108    void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
109
110    void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
111    void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
112
113    void addExpansions(UChar32 start, UChar32 end);
114    void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
115
116    /** Prefixes are reversed in the data structure. */
117    void setPrefix(const UnicodeString &pfx) {
118        unreversedPrefix = pfx;
119        unreversedPrefix.reverse();
120    }
121    void resetPrefix() {
122        unreversedPrefix.remove();
123    }
124
125    const CollationData *data;
126    UnicodeSet *contractions;
127    UnicodeSet *expansions;
128    CESink *sink;
129    UBool addPrefixes;
130    int8_t checkTailored;  // -1: collected tailored  +1: exclude tailored
131    UnicodeSet tailored;
132    UnicodeSet ranges;
133    UnicodeString unreversedPrefix;
134    const UnicodeString *suffix;
135    int64_t ces[Collation::MAX_EXPANSION_LENGTH];
136    UErrorCode errorCode;
137};
138
139U_NAMESPACE_END
140
141#endif  // !UCONFIG_NO_COLLATION
142#endif  // __COLLATIONSETS_H__
143