164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru***************************************************************************
5f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius*   Copyright (C) 1999-2014 International Business Machines Corporation   *
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   and others. All rights reserved.                                      *
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru***************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbbidata.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbbirb.h"
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "utrie.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "udatamem.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "umutex.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uassert.h"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------------
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//   Trie access folding function.  Copied as-is from properties code in uchar.c
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------------
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetFoldingOffset(uint32_t data) {
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(data&0x8000) {
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return (int32_t)(data&0x7fff);
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//    Constructors.
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status) {
51f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    init0();
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    init(data, status);
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
5585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoRBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt, UErrorCode &status) {
56f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    init0();
5785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    init(data, status);
5885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    fDontFreeData = TRUE;
5985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
6085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
62f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    init0();
63f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if (U_FAILURE(status)) {
64f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return;
65f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
66f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    const DataHeader *dh = udm->pHeader;
67f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    int32_t headerSize = dh->dataHeader.headerSize;
68f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if (  !(headerSize >= 20 &&
69f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            dh->info.isBigEndian == U_IS_BIG_ENDIAN &&
70f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            dh->info.charsetFamily == U_CHARSET_FAMILY &&
71f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            dh->info.dataFormat[0] == 0x42 &&  // dataFormat="Brk "
72f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            dh->info.dataFormat[1] == 0x72 &&
73f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            dh->info.dataFormat[2] == 0x6b &&
74f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            dh->info.dataFormat[3] == 0x20)
75f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            // Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is
76f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            //       validated when checking that.
77f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        ) {
78f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        status = U_INVALID_FORMAT_ERROR;
79f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return;
80f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
81f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    const char *dataAsBytes = reinterpret_cast<const char *>(dh);
82f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    const RBBIDataHeader *rbbidh = reinterpret_cast<const RBBIDataHeader *>(dataAsBytes + headerSize);
83f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    init(rbbidh, status);
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fUDataMem = udm;
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//    init().   Does most of the work of construction, shared between the
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//              constructors.
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
93f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusvoid RBBIDataWrapper::init0() {
94f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    fHeader = NULL;
95f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    fForwardTable = NULL;
96f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    fReverseTable = NULL;
97f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    fSafeFwdTable = NULL;
98f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    fSafeRevTable = NULL;
99f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    fRuleSource = NULL;
100f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    fRuleStatusTable = NULL;
101f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    fUDataMem = NULL;
102f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    fRefCount = 0;
103f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    fDontFreeData = TRUE;
104f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}
105f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(status)) {
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fHeader = data;
11127f654740f2a26ad62a5c155af9199af9e69b889claireho    if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3)
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        status = U_INVALID_FORMAT_ERROR;
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
11627f654740f2a26ad62a5c155af9199af9e69b889claireho    // Note: in ICU version 3.2 and earlier, there was a formatVersion 1
11727f654740f2a26ad62a5c155af9199af9e69b889claireho    //       that is no longer supported.  At that time fFormatVersion was
11827f654740f2a26ad62a5c155af9199af9e69b889claireho    //       an int32_t field, rather than an array of 4 bytes.
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    fDontFreeData = FALSE;
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (data->fFTableLen != 0) {
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable);
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (data->fRTableLen != 0) {
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (data->fSFTableLen != 0) {
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fSafeFwdTable = (RBBIStateTable *)((char *)data + fHeader->fSFTable);
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (data->fSRTableLen != 0) {
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utrie_unserialize(&fTrie,
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       (uint8_t *)data + fHeader->fTrie,
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       fHeader->fTrieLen,
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       &status);
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(status)) {
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fTrie.getFoldingOffset=getFoldingOffset;
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fRuleSource   = (UChar *)((char *)data + fHeader->fRuleSource);
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fRuleString.setTo(TRUE, fRuleSource, -1);
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_ASSERT(data->fRuleSourceLen > 0);
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable);
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fStatusMaxIdx    = data->fStatusTableLen / sizeof(int32_t);
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fRefCount = 1;
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char *debugEnv = getenv("U_RBBIDEBUG");
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();}
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//    Destructor.     Don't call this - use removeReference() instead.
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRBBIDataWrapper::~RBBIDataWrapper() {
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_ASSERT(fRefCount == 0);
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fUDataMem) {
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        udata_close(fUDataMem);
17085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else if (!fDontFreeData) {
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_free((void *)fHeader);
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//   Operator ==    Consider two RBBIDataWrappers to be equal if they
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                  refer to the same underlying data.  Although
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                  the data wrappers are normally shared between
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                  iterator instances, it's possible to independently
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                  open the same data twice, and get two instances, which
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                  should still be ==.
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const {
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fHeader == other.fHeader) {
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fHeader->fLength != other.fHeader->fLength) {
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (uprv_memcmp(fHeader, other.fHeader, fHeader->fLength) == 0) {
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return FALSE;
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t  RBBIDataWrapper::hashCode() {
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return fHeader->fFTableLen;
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//    Reference Counting.   A single RBBIDataWrapper object is shared among
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          however many RulesBasedBreakIterator instances are
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          referencing the same data.
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIDataWrapper::removeReference() {
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (umtx_atomic_dec(&fRefCount) == 0) {
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delete this;
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRBBIDataWrapper *RBBIDataWrapper::addReference() {
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru   umtx_atomic_inc(&fRefCount);
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru   return this;
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  getRuleSourceString
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UnicodeString &RBBIDataWrapper::getRuleSourceString() const {
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return fRuleString;
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  print   -  debugging function to dump the runtime data tables.
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid  RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *table) {
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t   c;
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t   s;
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDebugPrintf("   %s\n", heading);
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDebugPrintf("State |  Acc  LA TagIx");
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (c=0; c<fHeader->fCatCount; c++) {RBBIDebugPrintf("%3d ", c);}
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDebugPrintf("\n------|---------------"); for (c=0;c<fHeader->fCatCount; c++) {
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        RBBIDebugPrintf("----");
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDebugPrintf("\n");
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (table == NULL) {
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        RBBIDebugPrintf("         N U L L   T A B L E\n\n");
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (s=0; s<table->fNumStates; s++) {
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        RBBIStateTableRow *row = (RBBIStateTableRow *)
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                  (table->fTableData + (table->fRowLen * s));
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        RBBIDebugPrintf("%4d  |  %3d %3d %3d ", s, row->fAccepting, row->fLookAhead, row->fTagIdx);
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for (c=0; c<fHeader->fCatCount; c++)  {
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            RBBIDebugPrintf("%3d ", row->fNextState[c]);
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        RBBIDebugPrintf("\n");
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDebugPrintf("\n");
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid  RBBIDataWrapper::printData() {
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDebugPrintf("   Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                    fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDebugPrintf("   total length of data  = %d\n", fHeader->fLength);
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDebugPrintf("   number of character categories = %d\n\n", fHeader->fCatCount);
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printTable("Forward State Transition Table", fForwardTable);
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printTable("Reverse State Transition Table", fReverseTable);
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printTable("Safe Forward State Transition Table", fSafeFwdTable);
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printTable("Safe Reverse State Transition Table", fSafeRevTable);
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDebugPrintf("\nOrignal Rules source:\n");
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (int32_t c=0; fRuleSource[c] != 0; c++) {
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        RBBIDebugPrintf("%c", fRuleSource[c]);
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDebugPrintf("\n\n");
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_USE
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  ubrk_swap   -  byte swap and char encoding swap of RBBI data
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//-----------------------------------------------------------------------------
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           UErrorCode *status) {
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (status == NULL || U_FAILURE(*status)) {
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *status=U_ILLEGAL_ARGUMENT_ERROR;
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //  Check that the data header is for for break data.
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //    (Header contents are defined in genbrk.cpp)
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4);
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(!(  pInfo->dataFormat[0]==0x42 &&   /* dataFormat="Brk " */
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           pInfo->dataFormat[1]==0x72 &&
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           pInfo->dataFormat[2]==0x6b &&
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           pInfo->dataFormat[3]==0x20 &&
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           pInfo->formatVersion[0]==3  )) {
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         pInfo->dataFormat[0], pInfo->dataFormat[1],
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         pInfo->dataFormat[2], pInfo->dataFormat[3],
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         pInfo->formatVersion[0]);
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *status=U_UNSUPPORTED_ERROR;
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Swap the data header.  (This is the generic ICU Data Header, not the RBBI Specific
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //                         RBBIDataHeader).  This swap also conveniently gets us
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //                         the size of the ICU d.h., which lets us locate the start
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //                         of the RBBI specific data.
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Get the RRBI Data Header, and check that it appears to be OK.
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //    Note:  ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //           an int32_t with a value of 1.  Starting with ICU 3.4,
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //           RBBI's fDataFormat matches the dataFormat field from the
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //           UDataInfo header, four int8_t bytes.  The value is {3,1,0,0}
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t  *inBytes =(const uint8_t *)inData+headerSize;
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
35427f654740f2a26ad62a5c155af9199af9e69b889claireho    if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
35527f654740f2a26ad62a5c155af9199af9e69b889claireho        rbbiDH->fFormatVersion[0] != 3 ||
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ds->readUInt32(rbbiDH->fLength)  <  sizeof(RBBIDataHeader))
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *status=U_UNSUPPORTED_ERROR;
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Prefight operation?  Just return the size
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength);
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t totalSize = headerSize + breakDataLength;
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (length < 0) {
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return totalSize;
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Check that length passed in is consistent with length from RBBI data header.
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (length < totalSize) {
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n",
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            breakDataLength);
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *status=U_INDEX_OUTOFBOUNDS_ERROR;
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Swap the Data.  Do the data itself first, then the RBBI Data Header, because
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //                 we need to reference the header to locate the data, and an
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //                 inplace swap of the header leaves it unusable.
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t         *outBytes = (uint8_t *)outData + headerSize;
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RBBIDataHeader  *outputDH = (RBBIDataHeader *)outBytes;
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t   tableStartOffset;
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t   tableLength;
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // If not swapping in place, zero out the output buffer before starting.
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //    Individual tables and other data items within are aligned to 8 byte boundaries
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //    when originally created.  Any unused space between items needs to be zero.
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (inBytes != outBytes) {
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_memset(outBytes, 0, breakDataLength);
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Each state table begins with several 32 bit fields.  Calculate the size
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //   in bytes of these.
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t         topSize = offsetof(RBBIStateTable, fTableData);
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Forward state table.
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    tableStartOffset = ds->readUInt32(rbbiDH->fFTable);
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    tableLength      = ds->readUInt32(rbbiDH->fFTableLen);
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (tableLength > 0) {
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            outBytes+tableStartOffset, status);
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            outBytes+tableStartOffset+topSize, status);
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Reverse state table.  Same layout as forward table, above.
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    tableStartOffset = ds->readUInt32(rbbiDH->fRTable);
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    tableLength      = ds->readUInt32(rbbiDH->fRTableLen);
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (tableLength > 0) {
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            outBytes+tableStartOffset, status);
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            outBytes+tableStartOffset+topSize, status);
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Safe Forward state table.  Same layout as forward table, above.
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    tableStartOffset = ds->readUInt32(rbbiDH->fSFTable);
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    tableLength      = ds->readUInt32(rbbiDH->fSFTableLen);
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (tableLength > 0) {
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            outBytes+tableStartOffset, status);
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            outBytes+tableStartOffset+topSize, status);
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Safe Reverse state table.  Same layout as forward table, above.
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    tableStartOffset = ds->readUInt32(rbbiDH->fSRTable);
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    tableLength      = ds->readUInt32(rbbiDH->fSRTableLen);
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (tableLength > 0) {
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            outBytes+tableStartOffset, status);
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            outBytes+tableStartOffset+topSize, status);
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Trie table for character categories
454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            outBytes+ds->readUInt32(rbbiDH->fTrie), status);
456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Source Rules Text.  It's UChar data
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        outBytes+ds->readUInt32(rbbiDH->fRuleSource), status);
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Table of rule status values.  It's all int_32 values
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen),
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        outBytes+ds->readUInt32(rbbiDH->fStatusTable), status);
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // And, last, the header.
46627f654740f2a26ad62a5c155af9199af9e69b889claireho    //   It is all int32_t values except for fFormataVersion, which is an array of four bytes.
46727f654740f2a26ad62a5c155af9199af9e69b889claireho    //   Swap the whole thing as int32_t, then re-swap the one field.
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
47027f654740f2a26ad62a5c155af9199af9e69b889claireho    ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status);
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return totalSize;
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
477