collationbasedatabuilder.h revision c73f511526464f8e56c242df80552e9b0d94ae3d
1/* 2******************************************************************************* 3* Copyright (C) 2012-2014, International Business Machines 4* Corporation and others. All Rights Reserved. 5******************************************************************************* 6* collationbasedatabuilder.h 7* 8* created on: 2012aug11 9* created by: Markus W. Scherer 10*/ 11 12#ifndef __COLLATIONBASEDATABUILDER_H__ 13#define __COLLATIONBASEDATABUILDER_H__ 14 15#include "unicode/utypes.h" 16 17#if !UCONFIG_NO_COLLATION 18 19#include "unicode/uniset.h" 20#include "unicode/unistr.h" 21#include "collation.h" 22#include "collationdata.h" 23#include "collationdatabuilder.h" 24#include "normalizer2impl.h" 25#include "utrie2.h" 26#include "uvectr32.h" 27#include "uvectr64.h" 28#include "uvector.h" 29 30U_NAMESPACE_BEGIN 31 32/** 33 * Low-level base CollationData builder. 34 */ 35class U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder { 36public: 37 CollationBaseDataBuilder(UErrorCode &errorCode); 38 39 virtual ~CollationBaseDataBuilder(); 40 41 void init(UErrorCode &errorCode); 42 43 /** 44 * Sets the Han ranges as ranges of offset CE32s. 45 * Note: Unihan extension A sorts after the other BMP ranges. 46 * See http://www.unicode.org/reports/tr10/#Implicit_Weights 47 * 48 * @param ranges array of ranges of [:Unified_Ideograph:] in collation order, 49 * as (start, end) code point pairs 50 * @param length number of code points (not pairs) 51 * @param errorCode in/out error code 52 */ 53 void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &errorCode); 54 55 void setNumericPrimary(uint32_t np) { numericPrimary = np; } 56 57 virtual UBool isCompressibleLeadByte(uint32_t b) const; 58 59 void setCompressibleLeadByte(uint32_t b); 60 61 static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible); 62 static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible); 63 64 virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); 65 66 void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); 67 void addRootElement(int64_t ce, UErrorCode &errorCode); 68 69 void addReorderingGroup(uint32_t firstByte, uint32_t lastByte, 70 const UnicodeString &groupScripts, 71 UErrorCode &errorCode); 72 73 virtual void build(CollationData &data, UErrorCode &errorCode); 74 75 void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode); 76 77private: 78 int32_t writeRootElementsRange( 79 uint32_t prevPrimary, uint32_t p, int32_t i, 80 UVector32 &table, UErrorCode &errorCode); 81 82 // Flags for which primary-weight lead bytes are compressible. 83 UBool compressibleBytes[256]; 84 uint32_t numericPrimary; 85 uint32_t firstHanPrimary; 86 uint32_t lastHanPrimary; 87 int32_t hanStep; 88 UVector64 rootElements; 89 UnicodeString scripts; 90}; 91 92U_NAMESPACE_END 93 94#endif // !UCONFIG_NO_COLLATION 95#endif // __COLLATIONBASEDATABUILDER_H__ 96