1/* 2******************************************************************************* 3* 4* Copyright (C) 1999-2013 International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: rbbidata.h 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* RBBI data formats Includes 14* 15* Structs that describes the format of the Binary RBBI data, 16* as it is stored in ICU's data file. 17* 18* RBBIDataWrapper - Instances of this class sit between the 19* raw data structs and the RulesBasedBreakIterator objects 20* that are created by applications. The wrapper class 21* provides reference counting for the underlying data, 22* and direct pointers to data that would not otherwise 23* be accessible without ugly pointer arithmetic. The 24* wrapper does not attempt to provide any higher level 25* abstractions for the data itself. 26* 27* There will be only one instance of RBBIDataWrapper for any 28* set of RBBI run time data being shared by instances 29* (clones) of RulesBasedBreakIterator. 30*/ 31 32#ifndef __RBBIDATA_H__ 33#define __RBBIDATA_H__ 34 35#include "unicode/utypes.h" 36#include "unicode/udata.h" 37#include "udataswp.h" 38 39/** 40 * Swap RBBI data. See udataswp.h. 41 * @internal 42 */ 43U_CAPI int32_t U_EXPORT2 44ubrk_swap(const UDataSwapper *ds, 45 const void *inData, int32_t length, void *outData, 46 UErrorCode *pErrorCode); 47 48#ifdef __cplusplus 49 50#include "unicode/uobject.h" 51#include "unicode/unistr.h" 52#include "umutex.h" 53#include "utrie.h" 54 55U_NAMESPACE_BEGIN 56 57/* 58 * The following structs map exactly onto the raw data from ICU common data file. 59 */ 60struct RBBIDataHeader { 61 uint32_t fMagic; /* == 0xbla0 */ 62 uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */ 63 /* if there is one associated with this data. */ 64 /* (version originates in rbbi, is copied to UDataInfo) */ 65 /* For ICU 3.2 and earlier, this field was */ 66 /* uint32_t fVersion */ 67 /* with a value of 1. */ 68 uint32_t fLength; /* Total length in bytes of this RBBI Data, */ 69 /* including all sections, not just the header. */ 70 uint32_t fCatCount; /* Number of character categories. */ 71 72 /* */ 73 /* Offsets and sizes of each of the subsections within the RBBI data. */ 74 /* All offsets are bytes from the start of the RBBIDataHeader. */ 75 /* All sizes are in bytes. */ 76 /* */ 77 uint32_t fFTable; /* forward state transition table. */ 78 uint32_t fFTableLen; 79 uint32_t fRTable; /* Offset to the reverse state transition table. */ 80 uint32_t fRTableLen; 81 uint32_t fSFTable; /* safe point forward transition table */ 82 uint32_t fSFTableLen; 83 uint32_t fSRTable; /* safe point reverse transition table */ 84 uint32_t fSRTableLen; 85 uint32_t fTrie; /* Offset to Trie data for character categories */ 86 uint32_t fTrieLen; 87 uint32_t fRuleSource; /* Offset to the source for for the break */ 88 uint32_t fRuleSourceLen; /* rules. Stored UChar *. */ 89 uint32_t fStatusTable; /* Offset to the table of rule status values */ 90 uint32_t fStatusTableLen; 91 92 uint32_t fReserved[6]; /* Reserved for expansion */ 93 94}; 95 96 97 98struct RBBIStateTableRow { 99 int16_t fAccepting; /* Non-zero if this row is for an accepting state. */ 100 /* Value 0: not an accepting state. */ 101 /* -1: Unconditional Accepting state. */ 102 /* positive: Look-ahead match has completed. */ 103 /* Actual boundary position happened earlier */ 104 /* Value here == fLookAhead in earlier */ 105 /* state, at actual boundary pos. */ 106 int16_t fLookAhead; /* Non-zero if this row is for a state that */ 107 /* corresponds to a '/' in the rule source. */ 108 /* Value is the same as the fAccepting */ 109 /* value for the rule (which will appear */ 110 /* in a different state. */ 111 int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */ 112 /* from a rule. Value is the index in the */ 113 /* StatusTable of the set of matching */ 114 /* tags (rule status values) */ 115 int16_t fReserved; 116 uint16_t fNextState[2]; /* Next State, indexed by char category. */ 117 /* This array does not have two elements */ 118 /* Array Size is actually fData->fHeader->fCatCount */ 119 /* CAUTION: see RBBITableBuilder::getTableSize() */ 120 /* before changing anything here. */ 121}; 122 123 124struct RBBIStateTable { 125 uint32_t fNumStates; /* Number of states. */ 126 uint32_t fRowLen; /* Length of a state table row, in bytes. */ 127 uint32_t fFlags; /* Option Flags for this state table */ 128 uint32_t fReserved; /* reserved */ 129 char fTableData[4]; /* First RBBIStateTableRow begins here. */ 130 /* (making it char[] simplifies ugly address */ 131 /* arithmetic for indexing variable length rows.) */ 132}; 133 134typedef enum { 135 RBBI_LOOKAHEAD_HARD_BREAK = 1, 136 RBBI_BOF_REQUIRED = 2 137} RBBIStateTableFlags; 138 139 140/* */ 141/* The reference counting wrapper class */ 142/* */ 143class RBBIDataWrapper : public UMemory { 144public: 145 enum EDontAdopt { 146 kDontAdopt 147 }; 148 RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status); 149 RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status); 150 RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); 151 ~RBBIDataWrapper(); 152 153 void init(const RBBIDataHeader *data, UErrorCode &status); 154 RBBIDataWrapper *addReference(); 155 void removeReference(); 156 UBool operator ==(const RBBIDataWrapper &other) const; 157 int32_t hashCode(); 158 const UnicodeString &getRuleSourceString() const; 159#ifdef RBBI_DEBUG 160 void printData(); 161 void printTable(const char *heading, const RBBIStateTable *table); 162#else 163 #define printData() 164 #define printTable(heading, table) 165#endif 166 167 /* */ 168 /* Pointers to items within the data */ 169 /* */ 170 const RBBIDataHeader *fHeader; 171 const RBBIStateTable *fForwardTable; 172 const RBBIStateTable *fReverseTable; 173 const RBBIStateTable *fSafeFwdTable; 174 const RBBIStateTable *fSafeRevTable; 175 const UChar *fRuleSource; 176 const int32_t *fRuleStatusTable; 177 178 /* number of int32_t values in the rule status table. Used to sanity check indexing */ 179 int32_t fStatusMaxIdx; 180 181 UTrie fTrie; 182 183private: 184 u_atomic_int32_t fRefCount; 185 UDataMemory *fUDataMem; 186 UnicodeString fRuleString; 187 UBool fDontFreeData; 188 189 RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */ 190 RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */ 191}; 192 193 194 195U_NAMESPACE_END 196 197#endif /* C++ */ 198 199#endif 200