1/*
2*******************************************************************************
3*
4*   Copyright (C) 1999-2005,2008 International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  rbbidata.h
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   RBBI data formats  Includes
14*
15*                          Structs that describes the format of the Binary RBBI data,
16*                          as it is stored in ICU's data file.
17*
18*      RBBIDataWrapper  -  Instances of this class sit between the
19*                          raw data structs and the RulesBasedBreakIterator objects
20*                          that are created by applications.  The wrapper class
21*                          provides reference counting for the underlying data,
22*                          and direct pointers to data that would not otherwise
23*                          be accessible without ugly pointer arithmetic.  The
24*                          wrapper does not attempt to provide any higher level
25*                          abstractions for the data itself.
26*
27*                          There will be only one instance of RBBIDataWrapper for any
28*                          set of RBBI run time data being shared by instances
29*                          (clones) of RulesBasedBreakIterator.
30*/
31
32#ifndef __RBBIDATA_H__
33#define __RBBIDATA_H__
34
35#include "unicode/utypes.h"
36#include "unicode/udata.h"
37#include "udataswp.h"
38
39/**
40 * Swap RBBI data. See udataswp.h.
41 * @internal
42 */
43U_CAPI int32_t U_EXPORT2
44ubrk_swap(const UDataSwapper *ds,
45          const void *inData, int32_t length, void *outData,
46          UErrorCode *pErrorCode);
47
48#ifdef XP_CPLUSPLUS
49
50#include "unicode/uobject.h"
51#include "unicode/unistr.h"
52#include "utrie.h"
53
54U_NAMESPACE_BEGIN
55
56/*
57 *   The following structs map exactly onto the raw data from ICU common data file.
58 */
59struct RBBIDataHeader {
60    uint32_t         fMagic;           /*  == 0xbla0                                               */
61    uint8_t          fFormatVersion[4]; /* Data Format.  Same as the value in struct UDataInfo      */
62                                       /*   if there is one associated with this data.             */
63                                       /*     (version originates in rbbi, is copied to UDataInfo) */
64                                       /*   For ICU 3.2 and earlier, this field was                */
65                                       /*       uint32_t  fVersion                                 */
66                                       /*   with a value of 1.                                     */
67    uint32_t         fLength;          /*  Total length in bytes of this RBBI Data,                */
68                                       /*      including all sections, not just the header.        */
69    uint32_t         fCatCount;        /*  Number of character categories.                         */
70
71    /*                                                                        */
72    /*  Offsets and sizes of each of the subsections within the RBBI data.    */
73    /*  All offsets are bytes from the start of the RBBIDataHeader.           */
74    /*  All sizes are in bytes.                                               */
75    /*                                                                        */
76    uint32_t         fFTable;         /*  forward state transition table. */
77    uint32_t         fFTableLen;
78    uint32_t         fRTable;         /*  Offset to the reverse state transition table. */
79    uint32_t         fRTableLen;
80    uint32_t         fSFTable;        /*  safe point forward transition table */
81    uint32_t         fSFTableLen;
82    uint32_t         fSRTable;        /*  safe point reverse transition table */
83    uint32_t         fSRTableLen;
84    uint32_t         fTrie;           /*  Offset to Trie data for character categories */
85    uint32_t         fTrieLen;
86    uint32_t         fRuleSource;     /*  Offset to the source for for the break */
87    uint32_t         fRuleSourceLen;  /*    rules.  Stored UChar *. */
88    uint32_t         fStatusTable;    /* Offset to the table of rule status values */
89    uint32_t         fStatusTableLen;
90
91    uint32_t         fReserved[6];    /*  Reserved for expansion */
92
93};
94
95
96
97struct  RBBIStateTableRow {
98    int16_t          fAccepting;    /*  Non-zero if this row is for an accepting state.   */
99                                    /*  Value 0: not an accepting state.                  */
100                                    /*       -1: Unconditional Accepting state.           */
101                                    /*    positive:  Look-ahead match has completed.      */
102                                    /*           Actual boundary position happened earlier */
103                                    /*           Value here == fLookAhead in earlier      */
104                                    /*              state, at actual boundary pos.        */
105    int16_t          fLookAhead;    /*  Non-zero if this row is for a state that          */
106                                    /*    corresponds to a '/' in the rule source.        */
107                                    /*    Value is the same as the fAccepting             */
108                                    /*      value for the rule (which will appear         */
109                                    /*      in a different state.                         */
110    int16_t          fTagIdx;       /*  Non-zero if this row covers a {tagged} position   */
111                                    /*     from a rule.  Value is the index in the        */
112                                    /*     StatusTable of the set of matching             */
113                                    /*     tags (rule status values)                      */
114    int16_t          fReserved;
115    uint16_t         fNextState[2]; /*  Next State, indexed by char category.             */
116                                    /*    Array Size is fNumCols from the                 */
117                                    /*    state table header.                             */
118                                    /*    CAUTION:  see RBBITableBuilder::getTableSize()  */
119                                    /*              before changing anything here.        */
120};
121
122
123struct RBBIStateTable {
124    uint32_t         fNumStates;    /*  Number of states.                                 */
125    uint32_t         fRowLen;       /*  Length of a state table row, in bytes.            */
126    uint32_t         fFlags;        /*  Option Flags for this state table                 */
127    uint32_t         fReserved;     /*  reserved                                          */
128    char             fTableData[4]; /*  First RBBIStateTableRow begins here.              */
129                                    /*    (making it char[] simplifies ugly address       */
130                                    /*     arithmetic for indexing variable length rows.) */
131};
132
133typedef enum {
134    RBBI_LOOKAHEAD_HARD_BREAK = 1,
135    RBBI_BOF_REQUIRED = 2
136} RBBIStateTableFlags;
137
138
139/*                                        */
140/*   The reference counting wrapper class */
141/*                                        */
142class RBBIDataWrapper : public UMemory {
143public:
144    enum EDontAdopt {
145        kDontAdopt
146    };
147    RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
148    RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
149    RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
150    ~RBBIDataWrapper();
151
152    void                  init(const RBBIDataHeader *data, UErrorCode &status);
153    RBBIDataWrapper      *addReference();
154    void                  removeReference();
155    UBool                 operator ==(const RBBIDataWrapper &other) const;
156    int32_t               hashCode();
157    const UnicodeString  &getRuleSourceString() const;
158#ifdef RBBI_DEBUG
159    void                  printData();
160    void                  printTable(const char *heading, const RBBIStateTable *table);
161#else
162    #define printData()
163    #define printTable(heading, table)
164#endif
165
166    /*                                     */
167    /*   Pointers to items within the data */
168    /*                                     */
169    const RBBIDataHeader     *fHeader;
170    const RBBIStateTable     *fForwardTable;
171    const RBBIStateTable     *fReverseTable;
172    const RBBIStateTable     *fSafeFwdTable;
173    const RBBIStateTable     *fSafeRevTable;
174    const UChar              *fRuleSource;
175    const int32_t            *fRuleStatusTable;
176
177    /* number of int32_t values in the rule status table.   Used to sanity check indexing */
178    int32_t             fStatusMaxIdx;
179
180    UTrie               fTrie;
181
182private:
183    int32_t             fRefCount;
184    UDataMemory        *fUDataMem;
185    UnicodeString       fRuleString;
186    UBool               fDontFreeData;
187
188    RBBIDataWrapper(const RBBIDataWrapper &other); /*  forbid copying of this class */
189    RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /*  forbid copying of this class */
190};
191
192
193
194U_NAMESPACE_END
195
196#endif /* C++ */
197
198#endif
199