164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  rbbisetb.h
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Copyright (c) 2001-2005, International Business Machines
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef RBBISETB_H
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define RBBISETB_H
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h"
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbbirb.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uvector.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct  UNewTrie;
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  RBBISetBuilder   Derives the character categories used by the runtime RBBI engine
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   from the Unicode Sets appearing in the source  RBBI rules, and
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   creates the TRIE table used to map from Unicode to the
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   character categories.
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  RangeDescriptor
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     Each of the non-overlapping character ranges gets one of these descriptors.
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     All of them are strung together in a linked list, which is kept in order
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     (by character)
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass RangeDescriptor : public UMemory {
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32            fStartChar;      // Start of range, unicode 32 bit value.
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32            fEndChar;        // End of range, unicode 32 bit value.
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t            fNum;            // runtime-mapped input value for this range.
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UVector           *fIncludesSets;   // vector of the the original
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                        //   Unicode sets that include this range.
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                        //    (Contains ptrs to uset nodes)
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor   *fNext;           // Next RangeDescriptor in the linked list.
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor(UErrorCode &status);
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ~RangeDescriptor();
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void split(UChar32 where, UErrorCode &status);   // Spit this range in two at "where", with
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                        //   where appearing in the second (higher) part.
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void setDictionaryFlag();           // Check whether this range appears as part of
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                        //   the Unicode set named "dictionary"
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules.
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//      Starting with the rules parse tree from the scanner,
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   -  Enumerate the set of UnicodeSets that are referenced
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                      by the RBBI rules.
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   -  compute a derived set of non-overlapping UnicodeSets
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                      that will correspond to columns in the state table for
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                      the RBBI execution engine.
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                   -  construct the trie table that maps input characters
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                      to set numbers in the non-overlapping set of sets.
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass RBBISetBuilder : public UMemory {
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBISetBuilder(RBBIRuleBuilder *rb);
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ~RBBISetBuilder();
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     build();
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     addValToSets(UVector *sets,      uint32_t val);
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     addValToSet (RBBINode *usetNode, uint32_t val);
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t  getNumCharCategories() const;   // CharCategories are the same as input symbol set to the
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                             //    runtime state machine, which are the same as
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                             //    columns in the DFA state table
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t  getTrieSize() /*const*/;        // Size in bytes of the serialized Trie.
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     serializeTrie(uint8_t *where);  // write out the serialized Trie.
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32  getFirstChar(int32_t  val) const;
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool    sawBOF() const;                 // Indicate whether any references to the {bof} pseudo
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                             //   character were encountered.
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     printSets();
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     printRanges();
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void     printRangeGroups();
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    #define printSets()
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    #define printRanges()
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    #define printRangeGroups()
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void           numberSets();
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIRuleBuilder       *fRB;             // The RBBI Rule Compiler that owns us.
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode            *fStatus;
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RangeDescriptor       *fRangeList;      // Head of the linked list of RangeDescriptors
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UNewTrie              *fTrie;           // The mapping TRIE that is the end result of processing
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t              fTrieSize;        //  the Unicode Sets.
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Groups correspond to character categories -
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //       groups of ranges that are in the same original UnicodeSets.
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //       fGroupCount is the index of the last used group.
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //       fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //       State table column 0 is not used.  Column 1 is for end-of-input.
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //       column 2 is for group 0.  Funny counting.
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t               fGroupCount;
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool                 fSawBOF;
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
133