1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  rbbisetb.h
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Copyright (c) 2001-2005, International Business Machines
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef RBBISETB_H
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define RBBISETB_H
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h"
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbbirb.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uvector.h"
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct  UNewTrie;
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  RBBISetBuilder   Derives the character categories used by the runtime RBBI engine
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   from the Unicode Sets appearing in the source  RBBI rules, and
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   creates the TRIE table used to map from Unicode to the
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   character categories.
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  RangeDescriptor
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     Each of the non-overlapping character ranges gets one of these descriptors.
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     All of them are strung together in a linked list, which is kept in order
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     (by character)
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass RangeDescriptor : public UMemory {
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32            fStartChar;      // Start of range, unicode 32 bit value.
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32            fEndChar;        // End of range, unicode 32 bit value.
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t            fNum;            // runtime-mapped input value for this range.
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UVector           *fIncludesSets;   // vector of the the original
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                        //   Unicode sets that include this range.
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                        //    (Contains ptrs to uset nodes)
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor   *fNext;           // Next RangeDescriptor in the linked list.
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor(UErrorCode &status);
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ~RangeDescriptor();
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void split(UChar32 where, UErrorCode &status);   // Spit this range in two at "where", with
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                        //   where appearing in the second (higher) part.
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void setDictionaryFlag();           // Check whether this range appears as part of
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                        //   the Unicode set named "dictionary"
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules.
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//      Starting with the rules parse tree from the scanner,
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   -  Enumerate the set of UnicodeSets that are referenced
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                      by the RBBI rules.
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   -  compute a derived set of non-overlapping UnicodeSets
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                      that will correspond to columns in the state table for
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                      the RBBI execution engine.
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   -  construct the trie table that maps input characters
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                      to set numbers in the non-overlapping set of sets.
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass RBBISetBuilder : public UMemory {
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBISetBuilder(RBBIRuleBuilder *rb);
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ~RBBISetBuilder();
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     build();
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     addValToSets(UVector *sets,      uint32_t val);
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     addValToSet (RBBINode *usetNode, uint32_t val);
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t  getNumCharCategories() const;   // CharCategories are the same as input symbol set to the
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                             //    runtime state machine, which are the same as
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                             //    columns in the DFA state table
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t  getTrieSize() /*const*/;        // Size in bytes of the serialized Trie.
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     serializeTrie(uint8_t *where);  // write out the serialized Trie.
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32  getFirstChar(int32_t  val) const;
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool    sawBOF() const;                 // Indicate whether any references to the {bof} pseudo
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                             //   character were encountered.
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     printSets();
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     printRanges();
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     printRangeGroups();
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    #define printSets()
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    #define printRanges()
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    #define printRangeGroups()
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void           numberSets();
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIRuleBuilder       *fRB;             // The RBBI Rule Compiler that owns us.
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode            *fStatus;
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor       *fRangeList;      // Head of the linked list of RangeDescriptors
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UNewTrie              *fTrie;           // The mapping TRIE that is the end result of processing
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t              fTrieSize;        //  the Unicode Sets.
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Groups correspond to character categories -
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //       groups of ranges that are in the same original UnicodeSets.
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //       fGroupCount is the index of the last used group.
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //       fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //       State table column 0 is not used.  Column 1 is for end-of-input.
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //       column 2 is for group 0.  Funny counting.
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t               fGroupCount;
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool                 fSawBOF;
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
131