164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*************************************************************************** 5f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* Copyright (C) 1999-2014 International Business Machines Corporation * 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All rights reserved. * 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*************************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbbidata.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbbirb.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "utrie.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "udatamem.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "umutex.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uassert.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------------- 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Trie access folding function. Copied as-is from properties code in uchar.c 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------------- 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetFoldingOffset(uint32_t data) { 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */ 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(data&0x8000) { 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (int32_t)(data&0x7fff); 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Constructors. 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status) { 51f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius init0(); 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru init(data, status); 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 5585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoRBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt, UErrorCode &status) { 56f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius init0(); 5785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho init(data, status); 5885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fDontFreeData = TRUE; 5985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 6085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) { 62f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius init0(); 63f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 64f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 65f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 66f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const DataHeader *dh = udm->pHeader; 67f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t headerSize = dh->dataHeader.headerSize; 68f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if ( !(headerSize >= 20 && 69f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius dh->info.isBigEndian == U_IS_BIG_ENDIAN && 70f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius dh->info.charsetFamily == U_CHARSET_FAMILY && 71f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk " 72f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius dh->info.dataFormat[1] == 0x72 && 73f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius dh->info.dataFormat[2] == 0x6b && 74f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius dh->info.dataFormat[3] == 0x20) 75f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius // Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is 76f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius // validated when checking that. 77f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 78f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius status = U_INVALID_FORMAT_ERROR; 79f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 80f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 81f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const char *dataAsBytes = reinterpret_cast<const char *>(dh); 82f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const RBBIDataHeader *rbbidh = reinterpret_cast<const RBBIDataHeader *>(dataAsBytes + headerSize); 83f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius init(rbbidh, status); 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fUDataMem = udm; 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// init(). Does most of the work of construction, shared between the 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// constructors. 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 93f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusvoid RBBIDataWrapper::init0() { 94f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius fHeader = NULL; 95f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius fForwardTable = NULL; 96f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius fReverseTable = NULL; 97f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius fSafeFwdTable = NULL; 98f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius fSafeRevTable = NULL; 99f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius fRuleSource = NULL; 100f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius fRuleStatusTable = NULL; 101f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius fUDataMem = NULL; 102f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius fRefCount = 0; 103f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius fDontFreeData = TRUE; 104f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 105f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) { 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHeader = data; 11127f654740f2a26ad62a5c155af9199af9e69b889claireho if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3) 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_INVALID_FORMAT_ERROR; 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 11627f654740f2a26ad62a5c155af9199af9e69b889claireho // Note: in ICU version 3.2 and earlier, there was a formatVersion 1 11727f654740f2a26ad62a5c155af9199af9e69b889claireho // that is no longer supported. At that time fFormatVersion was 11827f654740f2a26ad62a5c155af9199af9e69b889claireho // an int32_t field, rather than an array of 4 bytes. 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 12085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fDontFreeData = FALSE; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (data->fFTableLen != 0) { 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable); 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (data->fRTableLen != 0) { 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable); 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (data->fSFTableLen != 0) { 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fSafeFwdTable = (RBBIStateTable *)((char *)data + fHeader->fSFTable); 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (data->fSRTableLen != 0) { 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable); 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utrie_unserialize(&fTrie, 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (uint8_t *)data + fHeader->fTrie, 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHeader->fTrieLen, 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &status); 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fTrie.getFoldingOffset=getFoldingOffset; 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource); 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRuleString.setTo(TRUE, fRuleSource, -1); 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_ASSERT(data->fRuleSourceLen > 0); 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable); 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fStatusMaxIdx = data->fStatusTableLen / sizeof(int32_t); 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRefCount = 1; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *debugEnv = getenv("U_RBBIDEBUG"); 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();} 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Destructor. Don't call this - use removeReference() instead. 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRBBIDataWrapper::~RBBIDataWrapper() { 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_ASSERT(fRefCount == 0); 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fUDataMem) { 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_close(fUDataMem); 17085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if (!fDontFreeData) { 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free((void *)fHeader); 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Operator == Consider two RBBIDataWrappers to be equal if they 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// refer to the same underlying data. Although 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// the data wrappers are normally shared between 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// iterator instances, it's possible to independently 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// open the same data twice, and get two instances, which 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// should still be ==. 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const { 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fHeader == other.fHeader) { 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fHeader->fLength != other.fHeader->fLength) { 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (uprv_memcmp(fHeader, other.fHeader, fHeader->fLength) == 0) { 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t RBBIDataWrapper::hashCode() { 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fHeader->fFTableLen; 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Reference Counting. A single RBBIDataWrapper object is shared among 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// however many RulesBasedBreakIterator instances are 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// referencing the same data. 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIDataWrapper::removeReference() { 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (umtx_atomic_dec(&fRefCount) == 0) { 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete this; 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRBBIDataWrapper *RBBIDataWrapper::addReference() { 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru umtx_atomic_inc(&fRefCount); 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return this; 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// getRuleSourceString 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UnicodeString &RBBIDataWrapper::getRuleSourceString() const { 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fRuleString; 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// print - debugging function to dump the runtime data tables. 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *table) { 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c; 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t s; 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf(" %s\n", heading); 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("State | Acc LA TagIx"); 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (c=0; c<fHeader->fCatCount; c++) {RBBIDebugPrintf("%3d ", c);} 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("\n------|---------------"); for (c=0;c<fHeader->fCatCount; c++) { 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("----"); 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("\n"); 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (table == NULL) { 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf(" N U L L T A B L E\n\n"); 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (s=0; s<table->fNumStates; s++) { 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIStateTableRow *row = (RBBIStateTableRow *) 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (table->fTableData + (table->fRowLen * s)); 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->fAccepting, row->fLookAhead, row->fTagIdx); 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (c=0; c<fHeader->fCatCount; c++) { 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("%3d ", row->fNextState[c]); 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("\n"); 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("\n"); 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIDataWrapper::printData() { 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader); 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1], 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]); 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf(" total length of data = %d\n", fHeader->fLength); 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf(" number of character categories = %d\n\n", fHeader->fCatCount); 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printTable("Forward State Transition Table", fForwardTable); 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printTable("Reverse State Transition Table", fReverseTable); 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printTable("Safe Forward State Transition Table", fSafeFwdTable); 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printTable("Safe Reverse State Transition Table", fSafeRevTable); 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("\nOrignal Rules source:\n"); 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t c=0; fRuleSource[c] != 0; c++) { 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("%c", fRuleSource[c]); 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("\n\n"); 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_USE 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// ubrk_swap - byte swap and char encoding swap of RBBI data 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------- 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *status) { 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (status == NULL || U_FAILURE(*status)) { 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *status=U_ILLEGAL_ARGUMENT_ERROR; 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Check that the data header is for for break data. 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // (Header contents are defined in genbrk.cpp) 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4); 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!( pInfo->dataFormat[0]==0x42 && /* dataFormat="Brk " */ 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pInfo->dataFormat[1]==0x72 && 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pInfo->dataFormat[2]==0x6b && 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pInfo->dataFormat[3]==0x20 && 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pInfo->formatVersion[0]==3 )) { 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n", 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pInfo->dataFormat[0], pInfo->dataFormat[1], 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pInfo->dataFormat[2], pInfo->dataFormat[3], 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pInfo->formatVersion[0]); 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *status=U_UNSUPPORTED_ERROR; 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Swap the data header. (This is the generic ICU Data Header, not the RBBI Specific 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // RBBIDataHeader). This swap also conveniently gets us 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the size of the ICU d.h., which lets us locate the start 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // of the RBBI specific data. 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status); 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Get the RRBI Data Header, and check that it appears to be OK. 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // an int32_t with a value of 1. Starting with ICU 3.4, 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // RBBI's fDataFormat matches the dataFormat field from the 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // UDataInfo header, four int8_t bytes. The value is {3,1,0,0} 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *inBytes =(const uint8_t *)inData+headerSize; 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes; 35427f654740f2a26ad62a5c155af9199af9e69b889claireho if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 || 35527f654740f2a26ad62a5c155af9199af9e69b889claireho rbbiDH->fFormatVersion[0] != 3 || 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n"); 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *status=U_UNSUPPORTED_ERROR; 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Prefight operation? Just return the size 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength); 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t totalSize = headerSize + breakDataLength; 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (length < 0) { 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return totalSize; 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Check that length passed in is consistent with length from RBBI data header. 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (length < totalSize) { 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n", 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru breakDataLength); 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *status=U_INDEX_OUTOFBOUNDS_ERROR; 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Swap the Data. Do the data itself first, then the RBBI Data Header, because 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // we need to reference the header to locate the data, and an 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // inplace swap of the header leaves it unusable. 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *outBytes = (uint8_t *)outData + headerSize; 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDataHeader *outputDH = (RBBIDataHeader *)outBytes; 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t tableStartOffset; 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t tableLength; 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If not swapping in place, zero out the output buffer before starting. 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Individual tables and other data items within are aligned to 8 byte boundaries 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // when originally created. Any unused space between items needs to be zero. 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (inBytes != outBytes) { 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(outBytes, 0, breakDataLength); 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Each state table begins with several 32 bit fields. Calculate the size 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // in bytes of these. 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t topSize = offsetof(RBBIStateTable, fTableData); 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Forward state table. 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tableStartOffset = ds->readUInt32(rbbiDH->fFTable); 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tableLength = ds->readUInt32(rbbiDH->fFTableLen); 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableLength > 0) { 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->swapArray32(ds, inBytes+tableStartOffset, topSize, 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outBytes+tableStartOffset, status); 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize, 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outBytes+tableStartOffset+topSize, status); 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Reverse state table. Same layout as forward table, above. 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tableStartOffset = ds->readUInt32(rbbiDH->fRTable); 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tableLength = ds->readUInt32(rbbiDH->fRTableLen); 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableLength > 0) { 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->swapArray32(ds, inBytes+tableStartOffset, topSize, 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outBytes+tableStartOffset, status); 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize, 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outBytes+tableStartOffset+topSize, status); 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Safe Forward state table. Same layout as forward table, above. 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tableStartOffset = ds->readUInt32(rbbiDH->fSFTable); 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tableLength = ds->readUInt32(rbbiDH->fSFTableLen); 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableLength > 0) { 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->swapArray32(ds, inBytes+tableStartOffset, topSize, 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outBytes+tableStartOffset, status); 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize, 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outBytes+tableStartOffset+topSize, status); 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Safe Reverse state table. Same layout as forward table, above. 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tableStartOffset = ds->readUInt32(rbbiDH->fSRTable); 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tableLength = ds->readUInt32(rbbiDH->fSRTableLen); 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableLength > 0) { 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->swapArray32(ds, inBytes+tableStartOffset, topSize, 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outBytes+tableStartOffset, status); 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize, 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outBytes+tableStartOffset+topSize, status); 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Trie table for character categories 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen), 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outBytes+ds->readUInt32(rbbiDH->fTrie), status); 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Source Rules Text. It's UChar data 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen), 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outBytes+ds->readUInt32(rbbiDH->fRuleSource), status); 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Table of rule status values. It's all int_32 values 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen), 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outBytes+ds->readUInt32(rbbiDH->fStatusTable), status); 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // And, last, the header. 46627f654740f2a26ad62a5c155af9199af9e69b889claireho // It is all int32_t values except for fFormataVersion, which is an array of four bytes. 46727f654740f2a26ad62a5c155af9199af9e69b889claireho // Swap the whole thing as int32_t, then re-swap the one field. 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status); 47027f654740f2a26ad62a5c155af9199af9e69b889claireho ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status); 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return totalSize; 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 477