1/*
2*******************************************************************************
3* Copyright (C) 2012-2014, International Business Machines
4* Corporation and others.  All Rights Reserved.
5*******************************************************************************
6* collationbasedatabuilder.h
7*
8* created on: 2012aug11
9* created by: Markus W. Scherer
10*/
11
12#ifndef __COLLATIONBASEDATABUILDER_H__
13#define __COLLATIONBASEDATABUILDER_H__
14
15#include "unicode/utypes.h"
16
17#if !UCONFIG_NO_COLLATION
18
19#include "unicode/uniset.h"
20#include "unicode/unistr.h"
21#include "collation.h"
22#include "collationdata.h"
23#include "collationdatabuilder.h"
24#include "normalizer2impl.h"
25#include "utrie2.h"
26#include "uvectr32.h"
27#include "uvectr64.h"
28#include "uvector.h"
29
30U_NAMESPACE_BEGIN
31
32/**
33 * Low-level base CollationData builder.
34 */
35class U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder {
36public:
37    CollationBaseDataBuilder(UErrorCode &errorCode);
38
39    virtual ~CollationBaseDataBuilder();
40
41    void init(UErrorCode &errorCode);
42
43    /**
44     * Sets the Han ranges as ranges of offset CE32s.
45     * Note: Unihan extension A sorts after the other BMP ranges.
46     * See http://www.unicode.org/reports/tr10/#Implicit_Weights
47     *
48     * @param ranges array of ranges of [:Unified_Ideograph:] in collation order,
49     *               as (start, end) code point pairs
50     * @param length number of code points (not pairs)
51     * @param errorCode in/out error code
52     */
53    void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &errorCode);
54
55    void setNumericPrimary(uint32_t np) { numericPrimary = np; }
56
57    virtual UBool isCompressibleLeadByte(uint32_t b) const;
58
59    void setCompressibleLeadByte(uint32_t b);
60
61    static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible);
62    static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible);
63
64    virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
65
66    void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
67    void addRootElement(int64_t ce, UErrorCode &errorCode);
68
69    void addReorderingGroup(uint32_t firstByte, uint32_t lastByte,
70                            const UnicodeString &groupScripts,
71                            UErrorCode &errorCode);
72
73    virtual void build(CollationData &data, UErrorCode &errorCode);
74
75    void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode);
76
77private:
78    int32_t writeRootElementsRange(
79            uint32_t prevPrimary, uint32_t p, int32_t i,
80            UVector32 &table, UErrorCode &errorCode);
81
82    // Flags for which primary-weight lead bytes are compressible.
83    UBool compressibleBytes[256];
84    uint32_t numericPrimary;
85    uint32_t firstHanPrimary;
86    uint32_t lastHanPrimary;
87    int32_t hanStep;
88    UVector64 rootElements;
89    UnicodeString scripts;
90};
91
92U_NAMESPACE_END
93
94#endif  // !UCONFIG_NO_COLLATION
95#endif  // __COLLATIONBASEDATABUILDER_H__
96