1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 1999-2011 International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: rbbidata.h 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* RBBI data formats Includes 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Structs that describes the format of the Binary RBBI data, 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* as it is stored in ICU's data file. 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* RBBIDataWrapper - Instances of this class sit between the 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* raw data structs and the RulesBasedBreakIterator objects 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* that are created by applications. The wrapper class 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* provides reference counting for the underlying data, 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and direct pointers to data that would not otherwise 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* be accessible without ugly pointer arithmetic. The 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* wrapper does not attempt to provide any higher level 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* abstractions for the data itself. 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* There will be only one instance of RBBIDataWrapper for any 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* set of RBBI run time data being shared by instances 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (clones) of RulesBasedBreakIterator. 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef __RBBIDATA_H__ 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define __RBBIDATA_H__ 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/udata.h" 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "udataswp.h" 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Swap RBBI data. See udataswp.h. 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruubrk_swap(const UDataSwapper *ds, 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const void *inData, int32_t length, void *outData, 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode); 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 4883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#ifdef __cplusplus 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h" 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h" 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "utrie.h" 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The following structs map exactly onto the raw data from ICU common data file. 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct RBBIDataHeader { 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fMagic; /* == 0xbla0 */ 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */ 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* if there is one associated with this data. */ 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* (version originates in rbbi, is copied to UDataInfo) */ 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* For ICU 3.2 and earlier, this field was */ 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* uint32_t fVersion */ 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* with a value of 1. */ 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fLength; /* Total length in bytes of this RBBI Data, */ 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* including all sections, not just the header. */ 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fCatCount; /* Number of character categories. */ 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* */ 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Offsets and sizes of each of the subsections within the RBBI data. */ 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* All offsets are bytes from the start of the RBBIDataHeader. */ 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* All sizes are in bytes. */ 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* */ 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fFTable; /* forward state transition table. */ 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fFTableLen; 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fRTable; /* Offset to the reverse state transition table. */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fRTableLen; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fSFTable; /* safe point forward transition table */ 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fSFTableLen; 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fSRTable; /* safe point reverse transition table */ 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fSRTableLen; 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fTrie; /* Offset to Trie data for character categories */ 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fTrieLen; 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fRuleSource; /* Offset to the source for for the break */ 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fRuleSourceLen; /* rules. Stored UChar *. */ 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fStatusTable; /* Offset to the table of rule status values */ 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fStatusTableLen; 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fReserved[6]; /* Reserved for expansion */ 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct RBBIStateTableRow { 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int16_t fAccepting; /* Non-zero if this row is for an accepting state. */ 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Value 0: not an accepting state. */ 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* -1: Unconditional Accepting state. */ 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* positive: Look-ahead match has completed. */ 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Actual boundary position happened earlier */ 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Value here == fLookAhead in earlier */ 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state, at actual boundary pos. */ 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int16_t fLookAhead; /* Non-zero if this row is for a state that */ 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* corresponds to a '/' in the rule source. */ 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Value is the same as the fAccepting */ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* value for the rule (which will appear */ 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* in a different state. */ 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */ 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* from a rule. Value is the index in the */ 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* StatusTable of the set of matching */ 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* tags (rule status values) */ 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int16_t fReserved; 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t fNextState[2]; /* Next State, indexed by char category. */ 11683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /* This array does not have two elements */ 11783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /* Array Size is actually fData->fHeader->fCatCount */ 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CAUTION: see RBBITableBuilder::getTableSize() */ 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* before changing anything here. */ 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct RBBIStateTable { 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fNumStates; /* Number of states. */ 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fRowLen; /* Length of a state table row, in bytes. */ 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fFlags; /* Option Flags for this state table */ 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fReserved; /* reserved */ 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char fTableData[4]; /* First RBBIStateTableRow begins here. */ 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* (making it char[] simplifies ugly address */ 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* arithmetic for indexing variable length rows.) */ 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef enum { 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBI_LOOKAHEAD_HARD_BREAK = 1, 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBI_BOF_REQUIRED = 2 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} RBBIStateTableFlags; 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* */ 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The reference counting wrapper class */ 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* */ 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass RBBIDataWrapper : public UMemory { 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 14485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho enum EDontAdopt { 14585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho kDontAdopt 14685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho }; 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status); 14885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status); 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~RBBIDataWrapper(); 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void init(const RBBIDataHeader *data, UErrorCode &status); 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDataWrapper *addReference(); 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void removeReference(); 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool operator ==(const RBBIDataWrapper &other) const; 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t hashCode(); 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString &getRuleSourceString() const; 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void printData(); 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void printTable(const char *heading, const RBBIStateTable *table); 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru #define printData() 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru #define printTable(heading, table) 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* */ 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Pointers to items within the data */ 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* */ 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const RBBIDataHeader *fHeader; 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const RBBIStateTable *fForwardTable; 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const RBBIStateTable *fReverseTable; 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const RBBIStateTable *fSafeFwdTable; 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const RBBIStateTable *fSafeRevTable; 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *fRuleSource; 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const int32_t *fRuleStatusTable; 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* number of int32_t values in the rule status table. Used to sanity check indexing */ 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fStatusMaxIdx; 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTrie fTrie; 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fRefCount; 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UDataMemory *fUDataMem; 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString fRuleString; 18685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UBool fDontFreeData; 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */ 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */ 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* C++ */ 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 199