1//
2//  rbbisetb.h
3/*
4**********************************************************************
5*   Copyright (c) 2001-2005, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7**********************************************************************
8*/
9
10#ifndef RBBISETB_H
11#define RBBISETB_H
12
13#include "unicode/utypes.h"
14#include "unicode/uobject.h"
15#include "rbbirb.h"
16#include "uvector.h"
17
18struct  UNewTrie;
19
20U_NAMESPACE_BEGIN
21
22//
23//  RBBISetBuilder   Derives the character categories used by the runtime RBBI engine
24//                   from the Unicode Sets appearing in the source  RBBI rules, and
25//                   creates the TRIE table used to map from Unicode to the
26//                   character categories.
27//
28
29
30//
31//  RangeDescriptor
32//
33//     Each of the non-overlapping character ranges gets one of these descriptors.
34//     All of them are strung together in a linked list, which is kept in order
35//     (by character)
36//
37class RangeDescriptor : public UMemory {
38public:
39    UChar32            fStartChar;      // Start of range, unicode 32 bit value.
40    UChar32            fEndChar;        // End of range, unicode 32 bit value.
41    int32_t            fNum;            // runtime-mapped input value for this range.
42    UVector           *fIncludesSets;   // vector of the the original
43                                        //   Unicode sets that include this range.
44                                        //    (Contains ptrs to uset nodes)
45    RangeDescriptor   *fNext;           // Next RangeDescriptor in the linked list.
46
47    RangeDescriptor(UErrorCode &status);
48    RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
49    ~RangeDescriptor();
50    void split(UChar32 where, UErrorCode &status);   // Spit this range in two at "where", with
51                                        //   where appearing in the second (higher) part.
52    void setDictionaryFlag();           // Check whether this range appears as part of
53                                        //   the Unicode set named "dictionary"
54
55private:
56    RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
57    RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
58};
59
60
61//
62//  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules.
63//
64//      Starting with the rules parse tree from the scanner,
65//
66//                   -  Enumerate the set of UnicodeSets that are referenced
67//                      by the RBBI rules.
68//                   -  compute a derived set of non-overlapping UnicodeSets
69//                      that will correspond to columns in the state table for
70//                      the RBBI execution engine.
71//                   -  construct the trie table that maps input characters
72//                      to set numbers in the non-overlapping set of sets.
73//
74
75
76class RBBISetBuilder : public UMemory {
77public:
78    RBBISetBuilder(RBBIRuleBuilder *rb);
79    ~RBBISetBuilder();
80
81    void     build();
82    void     addValToSets(UVector *sets,      uint32_t val);
83    void     addValToSet (RBBINode *usetNode, uint32_t val);
84    int32_t  getNumCharCategories() const;   // CharCategories are the same as input symbol set to the
85                                             //    runtime state machine, which are the same as
86                                             //    columns in the DFA state table
87    int32_t  getTrieSize() /*const*/;        // Size in bytes of the serialized Trie.
88    void     serializeTrie(uint8_t *where);  // write out the serialized Trie.
89    UChar32  getFirstChar(int32_t  val) const;
90    UBool    sawBOF() const;                 // Indicate whether any references to the {bof} pseudo
91                                             //   character were encountered.
92#ifdef RBBI_DEBUG
93    void     printSets();
94    void     printRanges();
95    void     printRangeGroups();
96#else
97    #define printSets()
98    #define printRanges()
99    #define printRangeGroups()
100#endif
101
102private:
103    void           numberSets();
104
105    RBBIRuleBuilder       *fRB;             // The RBBI Rule Compiler that owns us.
106    UErrorCode            *fStatus;
107
108    RangeDescriptor       *fRangeList;      // Head of the linked list of RangeDescriptors
109
110    UNewTrie              *fTrie;           // The mapping TRIE that is the end result of processing
111    uint32_t              fTrieSize;        //  the Unicode Sets.
112
113    // Groups correspond to character categories -
114    //       groups of ranges that are in the same original UnicodeSets.
115    //       fGroupCount is the index of the last used group.
116    //       fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
117    //       State table column 0 is not used.  Column 1 is for end-of-input.
118    //       column 2 is for group 0.  Funny counting.
119    int32_t               fGroupCount;
120
121    UBool                 fSawBOF;
122
123    RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
124    RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
125};
126
127
128
129U_NAMESPACE_END
130#endif
131