1/* 2******************************************************************************* 3* Copyright (C) 2013-2014, International Business Machines 4* Corporation and others. All Rights Reserved. 5******************************************************************************* 6* collationsets.h 7* 8* created on: 2013feb09 9* created by: Markus W. Scherer 10*/ 11 12#ifndef __COLLATIONSETS_H__ 13#define __COLLATIONSETS_H__ 14 15#include "unicode/utypes.h" 16 17#if !UCONFIG_NO_COLLATION 18 19#include "unicode/uniset.h" 20#include "collation.h" 21 22U_NAMESPACE_BEGIN 23 24struct CollationData; 25 26/** 27 * Finds the set of characters and strings that sort differently in the tailoring 28 * from the base data. 29 * 30 * Every mapping in the tailoring needs to be compared to the base, 31 * because some mappings are copied for optimization, and 32 * all contractions for a character are copied if any contractions for that character 33 * are added, modified or removed. 34 * 35 * It might be simpler to re-parse the rule string, but: 36 * - That would require duplicating some of the from-rules builder code. 37 * - That would make the runtime code depend on the builder. 38 * - That would only work if we have the rule string, and we allow users to 39 * omit the rule string from data files. 40 */ 41class TailoredSet : public UMemory { 42public: 43 TailoredSet(UnicodeSet *t) 44 : data(NULL), baseData(NULL), 45 tailored(t), 46 suffix(NULL), 47 errorCode(U_ZERO_ERROR) {} 48 49 void forData(const CollationData *d, UErrorCode &errorCode); 50 51 /** 52 * @return U_SUCCESS(errorCode) in C++, void in Java 53 * @internal only public for access by callback 54 */ 55 UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32); 56 57private: 58 void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32); 59 void comparePrefixes(UChar32 c, const UChar *p, const UChar *q); 60 void compareContractions(UChar32 c, const UChar *p, const UChar *q); 61 62 void addPrefixes(const CollationData *d, UChar32 c, const UChar *p); 63 void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32); 64 void addContractions(UChar32 c, const UChar *p); 65 void addSuffix(UChar32 c, const UnicodeString &sfx); 66 void add(UChar32 c); 67 68 /** Prefixes are reversed in the data structure. */ 69 void setPrefix(const UnicodeString &pfx) { 70 unreversedPrefix = pfx; 71 unreversedPrefix.reverse(); 72 } 73 void resetPrefix() { 74 unreversedPrefix.remove(); 75 } 76 77 const CollationData *data; 78 const CollationData *baseData; 79 UnicodeSet *tailored; 80 UnicodeString unreversedPrefix; 81 const UnicodeString *suffix; 82 UErrorCode errorCode; 83}; 84 85class ContractionsAndExpansions : public UMemory { 86public: 87 class CESink : public UMemory { 88 public: 89 virtual ~CESink(); 90 virtual void handleCE(int64_t ce) = 0; 91 virtual void handleExpansion(const int64_t ces[], int32_t length) = 0; 92 }; 93 94 ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes) 95 : data(NULL), 96 contractions(con), expansions(exp), 97 sink(s), 98 addPrefixes(prefixes), 99 checkTailored(0), 100 suffix(NULL), 101 errorCode(U_ZERO_ERROR) {} 102 103 void forData(const CollationData *d, UErrorCode &errorCode); 104 void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec); 105 106 // all following: @internal, only public for access by callback 107 108 void handleCE32(UChar32 start, UChar32 end, uint32_t ce32); 109 110 void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32); 111 void handleContractions(UChar32 start, UChar32 end, uint32_t ce32); 112 113 void addExpansions(UChar32 start, UChar32 end); 114 void addStrings(UChar32 start, UChar32 end, UnicodeSet *set); 115 116 /** Prefixes are reversed in the data structure. */ 117 void setPrefix(const UnicodeString &pfx) { 118 unreversedPrefix = pfx; 119 unreversedPrefix.reverse(); 120 } 121 void resetPrefix() { 122 unreversedPrefix.remove(); 123 } 124 125 const CollationData *data; 126 UnicodeSet *contractions; 127 UnicodeSet *expansions; 128 CESink *sink; 129 UBool addPrefixes; 130 int8_t checkTailored; // -1: collected tailored +1: exclude tailored 131 UnicodeSet tailored; 132 UnicodeSet ranges; 133 UnicodeString unreversedPrefix; 134 const UnicodeString *suffix; 135 int64_t ces[Collation::MAX_EXPANSION_LENGTH]; 136 UErrorCode errorCode; 137}; 138 139U_NAMESPACE_END 140 141#endif // !UCONFIG_NO_COLLATION 142#endif // __COLLATIONSETS_H__ 143