1/*
2*******************************************************************************
3*
4*   Copyright (C) 1999-2013 International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  rbbidata.h
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   RBBI data formats  Includes
14*
15*                          Structs that describes the format of the Binary RBBI data,
16*                          as it is stored in ICU's data file.
17*
18*      RBBIDataWrapper  -  Instances of this class sit between the
19*                          raw data structs and the RulesBasedBreakIterator objects
20*                          that are created by applications.  The wrapper class
21*                          provides reference counting for the underlying data,
22*                          and direct pointers to data that would not otherwise
23*                          be accessible without ugly pointer arithmetic.  The
24*                          wrapper does not attempt to provide any higher level
25*                          abstractions for the data itself.
26*
27*                          There will be only one instance of RBBIDataWrapper for any
28*                          set of RBBI run time data being shared by instances
29*                          (clones) of RulesBasedBreakIterator.
30*/
31
32#ifndef __RBBIDATA_H__
33#define __RBBIDATA_H__
34
35#include "unicode/utypes.h"
36#include "unicode/udata.h"
37#include "udataswp.h"
38
39/**
40 * Swap RBBI data. See udataswp.h.
41 * @internal
42 */
43U_CAPI int32_t U_EXPORT2
44ubrk_swap(const UDataSwapper *ds,
45          const void *inData, int32_t length, void *outData,
46          UErrorCode *pErrorCode);
47
48#ifdef __cplusplus
49
50#include "unicode/uobject.h"
51#include "unicode/unistr.h"
52#include "umutex.h"
53#include "utrie.h"
54
55U_NAMESPACE_BEGIN
56
57/*
58 *   The following structs map exactly onto the raw data from ICU common data file.
59 */
60struct RBBIDataHeader {
61    uint32_t         fMagic;           /*  == 0xbla0                                               */
62    uint8_t          fFormatVersion[4]; /* Data Format.  Same as the value in struct UDataInfo      */
63                                       /*   if there is one associated with this data.             */
64                                       /*     (version originates in rbbi, is copied to UDataInfo) */
65                                       /*   For ICU 3.2 and earlier, this field was                */
66                                       /*       uint32_t  fVersion                                 */
67                                       /*   with a value of 1.                                     */
68    uint32_t         fLength;          /*  Total length in bytes of this RBBI Data,                */
69                                       /*      including all sections, not just the header.        */
70    uint32_t         fCatCount;        /*  Number of character categories.                         */
71
72    /*                                                                        */
73    /*  Offsets and sizes of each of the subsections within the RBBI data.    */
74    /*  All offsets are bytes from the start of the RBBIDataHeader.           */
75    /*  All sizes are in bytes.                                               */
76    /*                                                                        */
77    uint32_t         fFTable;         /*  forward state transition table. */
78    uint32_t         fFTableLen;
79    uint32_t         fRTable;         /*  Offset to the reverse state transition table. */
80    uint32_t         fRTableLen;
81    uint32_t         fSFTable;        /*  safe point forward transition table */
82    uint32_t         fSFTableLen;
83    uint32_t         fSRTable;        /*  safe point reverse transition table */
84    uint32_t         fSRTableLen;
85    uint32_t         fTrie;           /*  Offset to Trie data for character categories */
86    uint32_t         fTrieLen;
87    uint32_t         fRuleSource;     /*  Offset to the source for for the break */
88    uint32_t         fRuleSourceLen;  /*    rules.  Stored UChar *. */
89    uint32_t         fStatusTable;    /* Offset to the table of rule status values */
90    uint32_t         fStatusTableLen;
91
92    uint32_t         fReserved[6];    /*  Reserved for expansion */
93
94};
95
96
97
98struct  RBBIStateTableRow {
99    int16_t          fAccepting;    /*  Non-zero if this row is for an accepting state.   */
100                                    /*  Value 0: not an accepting state.                  */
101                                    /*       -1: Unconditional Accepting state.           */
102                                    /*    positive:  Look-ahead match has completed.      */
103                                    /*           Actual boundary position happened earlier */
104                                    /*           Value here == fLookAhead in earlier      */
105                                    /*              state, at actual boundary pos.        */
106    int16_t          fLookAhead;    /*  Non-zero if this row is for a state that          */
107                                    /*    corresponds to a '/' in the rule source.        */
108                                    /*    Value is the same as the fAccepting             */
109                                    /*      value for the rule (which will appear         */
110                                    /*      in a different state.                         */
111    int16_t          fTagIdx;       /*  Non-zero if this row covers a {tagged} position   */
112                                    /*     from a rule.  Value is the index in the        */
113                                    /*     StatusTable of the set of matching             */
114                                    /*     tags (rule status values)                      */
115    int16_t          fReserved;
116    uint16_t         fNextState[2]; /*  Next State, indexed by char category.             */
117                                    /*  This array does not have two elements             */
118                                    /*    Array Size is actually fData->fHeader->fCatCount         */
119                                    /*    CAUTION:  see RBBITableBuilder::getTableSize()  */
120                                    /*              before changing anything here.        */
121};
122
123
124struct RBBIStateTable {
125    uint32_t         fNumStates;    /*  Number of states.                                 */
126    uint32_t         fRowLen;       /*  Length of a state table row, in bytes.            */
127    uint32_t         fFlags;        /*  Option Flags for this state table                 */
128    uint32_t         fReserved;     /*  reserved                                          */
129    char             fTableData[4]; /*  First RBBIStateTableRow begins here.              */
130                                    /*    (making it char[] simplifies ugly address       */
131                                    /*     arithmetic for indexing variable length rows.) */
132};
133
134typedef enum {
135    RBBI_LOOKAHEAD_HARD_BREAK = 1,
136    RBBI_BOF_REQUIRED = 2
137} RBBIStateTableFlags;
138
139
140/*                                        */
141/*   The reference counting wrapper class */
142/*                                        */
143class RBBIDataWrapper : public UMemory {
144public:
145    enum EDontAdopt {
146        kDontAdopt
147    };
148    RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
149    RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
150    RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
151    ~RBBIDataWrapper();
152
153    void                  init(const RBBIDataHeader *data, UErrorCode &status);
154    RBBIDataWrapper      *addReference();
155    void                  removeReference();
156    UBool                 operator ==(const RBBIDataWrapper &other) const;
157    int32_t               hashCode();
158    const UnicodeString  &getRuleSourceString() const;
159#ifdef RBBI_DEBUG
160    void                  printData();
161    void                  printTable(const char *heading, const RBBIStateTable *table);
162#else
163    #define printData()
164    #define printTable(heading, table)
165#endif
166
167    /*                                     */
168    /*   Pointers to items within the data */
169    /*                                     */
170    const RBBIDataHeader     *fHeader;
171    const RBBIStateTable     *fForwardTable;
172    const RBBIStateTable     *fReverseTable;
173    const RBBIStateTable     *fSafeFwdTable;
174    const RBBIStateTable     *fSafeRevTable;
175    const UChar              *fRuleSource;
176    const int32_t            *fRuleStatusTable;
177
178    /* number of int32_t values in the rule status table.   Used to sanity check indexing */
179    int32_t             fStatusMaxIdx;
180
181    UTrie               fTrie;
182
183private:
184    u_atomic_int32_t    fRefCount;
185    UDataMemory        *fUDataMem;
186    UnicodeString       fRuleString;
187    UBool               fDontFreeData;
188
189    RBBIDataWrapper(const RBBIDataWrapper &other); /*  forbid copying of this class */
190    RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /*  forbid copying of this class */
191};
192
193
194
195U_NAMESPACE_END
196
197#endif /* C++ */
198
199#endif
200