1/*
2*******************************************************************************
3*
4*   Copyright (C) 2005-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  swapimpl.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2005may05
14*   created by: Markus W. Scherer
15*
16*   Data file swapping functions moved here from the common library
17*   because some data is hardcoded in ICU4C and needs not be swapped any more.
18*   Moving the functions here simplifies testing (for code coverage) because
19*   we need not jump through hoops (like adding snapshots of these files
20*   to testdata).
21*
22*   The declarations for these functions remain in the internal header files
23*   in icu/source/common/
24*/
25
26#include "unicode/utypes.h"
27#include "unicode/putil.h"
28#include "unicode/udata.h"
29
30/* Explicit include statement for std_string.h is needed
31 * for compilation on certain platforms. (e.g. AIX/VACPP)
32 */
33#include "unicode/std_string.h"
34
35#include "cmemory.h"
36#include "cstring.h"
37#include "uinvchar.h"
38#include "uassert.h"
39#include "uarrsort.h"
40#include "ucmndata.h"
41#include "udataswp.h"
42
43/* swapping implementations in common */
44
45#include "uresdata.h"
46#include "ucnv_io.h"
47#include "uprops.h"
48#include "ucase.h"
49#include "ubidi_props.h"
50#include "ucol_swp.h"
51#include "ucnv_bld.h"
52#include "unormimp.h"
53#include "normalizer2impl.h"
54#include "sprpimpl.h"
55#include "propname.h"
56#include "rbbidata.h"
57#include "triedict.h"
58#include "utrie2.h"
59
60/* swapping implementations in i18n */
61
62#if !UCONFIG_NO_NORMALIZATION
63#include "uspoof_impl.h"
64#endif
65
66
67/* definitions */
68
69#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
70
71/* Unicode property (value) aliases data swapping --------------------------- */
72
73static int32_t U_CALLCONV
74upname_swap(const UDataSwapper *ds,
75            const void *inData, int32_t length, void *outData,
76            UErrorCode *pErrorCode) {
77    /* udata_swapDataHeader checks the arguments */
78    int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
79    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
80        return 0;
81    }
82
83    /* check data format and format version */
84    const UDataInfo *pInfo=
85        reinterpret_cast<const UDataInfo *>(
86            reinterpret_cast<const char *>(inData)+4);
87    if(!(
88        pInfo->dataFormat[0]==0x70 &&   /* dataFormat="pnam" */
89        pInfo->dataFormat[1]==0x6e &&
90        pInfo->dataFormat[2]==0x61 &&
91        pInfo->dataFormat[3]==0x6d &&
92        pInfo->formatVersion[0]==2
93    )) {
94        udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
95                         pInfo->dataFormat[0], pInfo->dataFormat[1],
96                         pInfo->dataFormat[2], pInfo->dataFormat[3],
97                         pInfo->formatVersion[0]);
98        *pErrorCode=U_UNSUPPORTED_ERROR;
99        return 0;
100    }
101
102    const uint8_t *inBytes=reinterpret_cast<const uint8_t *>(inData)+headerSize;
103    uint8_t *outBytes=reinterpret_cast<uint8_t *>(outData)+headerSize;
104
105    if(length>=0) {
106        length-=headerSize;
107        // formatVersion 2 initially has indexes[8], 32 bytes.
108        if(length<32) {
109            udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
110                             (int)length);
111            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
112            return 0;
113        }
114    }
115
116    const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
117    int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
118    if(length>=0) {
119        if(length<totalSize) {
120            udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
121                             "for pnames.icu\n",
122                             (int)length, (int)totalSize);
123            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
124            return 0;
125        }
126
127        int32_t numBytesIndexesAndValueMaps=
128            udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
129
130        // Swap the indexes[] and the valueMaps[].
131        ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
132
133        // Copy the rest of the data.
134        if(inBytes!=outBytes) {
135            uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
136                        inBytes+numBytesIndexesAndValueMaps,
137                        totalSize-numBytesIndexesAndValueMaps);
138        }
139
140        // We need not swap anything else:
141        //
142        // The ByteTries are already byte-serialized, and are fixed on ASCII.
143        // (On an EBCDIC machine, the input string is converted to lowercase ASCII
144        // while matching.)
145        //
146        // The name groups are mostly invariant characters, but since we only
147        // generate, and keep in subversion, ASCII versions of pnames.icu,
148        // and since only ICU4J uses the pnames.icu data file
149        // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
150        // we just copy those bytes too.
151    }
152
153    return headerSize+totalSize;
154}
155
156/* Unicode properties data swapping ----------------------------------------- */
157
158static int32_t U_CALLCONV
159uprops_swap(const UDataSwapper *ds,
160            const void *inData, int32_t length, void *outData,
161            UErrorCode *pErrorCode) {
162    const UDataInfo *pInfo;
163    int32_t headerSize, i;
164
165    int32_t dataIndexes[UPROPS_INDEX_COUNT];
166    const int32_t *inData32;
167
168    /* udata_swapDataHeader checks the arguments */
169    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
170    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
171        return 0;
172    }
173
174    /* check data format and format version */
175    pInfo=(const UDataInfo *)((const char *)inData+4);
176    if(!(
177        pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UPro" */
178        pInfo->dataFormat[1]==0x50 &&
179        pInfo->dataFormat[2]==0x72 &&
180        pInfo->dataFormat[3]==0x6f &&
181        (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
182        (pInfo->formatVersion[0]>=7 ||
183            (pInfo->formatVersion[2]==UTRIE_SHIFT &&
184             pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
185    )) {
186        udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
187                         pInfo->dataFormat[0], pInfo->dataFormat[1],
188                         pInfo->dataFormat[2], pInfo->dataFormat[3],
189                         pInfo->formatVersion[0]);
190        *pErrorCode=U_UNSUPPORTED_ERROR;
191        return 0;
192    }
193
194    /* the properties file must contain at least the indexes array */
195    if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
196        udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
197                         length-headerSize);
198        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
199        return 0;
200    }
201
202    /* read the indexes */
203    inData32=(const int32_t *)((const char *)inData+headerSize);
204    for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
205        dataIndexes[i]=udata_readInt32(ds, inData32[i]);
206    }
207
208    /*
209     * comments are copied from the data format description in genprops/store.c
210     * indexes[] constants are in uprops.h
211     */
212    int32_t dataTop;
213    if(length>=0) {
214        int32_t *outData32;
215
216        /*
217         * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
218         * In earlier formatVersions, it is 0 and a lower dataIndexes entry
219         * has the top of the last item.
220         */
221        for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
222
223        if((length-headerSize)<(4*dataTop)) {
224            udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
225                             length-headerSize);
226            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
227            return 0;
228        }
229
230        outData32=(int32_t *)((char *)outData+headerSize);
231
232        /* copy everything for inaccessible data (padding) */
233        if(inData32!=outData32) {
234            uprv_memcpy(outData32, inData32, 4*dataTop);
235        }
236
237        /* swap the indexes[16] */
238        ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
239
240        /*
241         * swap the main properties UTrie
242         * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
243         */
244        utrie2_swapAnyVersion(ds,
245            inData32+UPROPS_INDEX_COUNT,
246            4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
247            outData32+UPROPS_INDEX_COUNT,
248            pErrorCode);
249
250        /*
251         * swap the properties and exceptions words
252         * P  const uint32_t props32[i1-i0];
253         * E  const uint32_t exceptions[i2-i1];
254         */
255        ds->swapArray32(ds,
256            inData32+dataIndexes[UPROPS_PROPS32_INDEX],
257            4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
258            outData32+dataIndexes[UPROPS_PROPS32_INDEX],
259            pErrorCode);
260
261        /*
262         * swap the UChars
263         * U  const UChar uchars[2*(i3-i2)];
264         */
265        ds->swapArray16(ds,
266            inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
267            4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
268            outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
269            pErrorCode);
270
271        /*
272         * swap the additional UTrie
273         * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
274         */
275        utrie2_swapAnyVersion(ds,
276            inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
277            4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
278            outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
279            pErrorCode);
280
281        /*
282         * swap the properties vectors
283         * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
284         */
285        ds->swapArray32(ds,
286            inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
287            4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
288            outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
289            pErrorCode);
290
291        // swap the Script_Extensions data
292        // SCX const uint16_t scriptExtensions[2*(i7-i6)];
293        ds->swapArray16(ds,
294            inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
295            4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
296            outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
297            pErrorCode);
298    }
299
300    /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
301    return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
302}
303
304/* Unicode case mapping data swapping --------------------------------------- */
305
306static int32_t U_CALLCONV
307ucase_swap(const UDataSwapper *ds,
308           const void *inData, int32_t length, void *outData,
309           UErrorCode *pErrorCode) {
310    const UDataInfo *pInfo;
311    int32_t headerSize;
312
313    const uint8_t *inBytes;
314    uint8_t *outBytes;
315
316    const int32_t *inIndexes;
317    int32_t indexes[16];
318
319    int32_t i, offset, count, size;
320
321    /* udata_swapDataHeader checks the arguments */
322    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
323    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
324        return 0;
325    }
326
327    /* check data format and format version */
328    pInfo=(const UDataInfo *)((const char *)inData+4);
329    if(!(
330        pInfo->dataFormat[0]==UCASE_FMT_0 &&    /* dataFormat="cAsE" */
331        pInfo->dataFormat[1]==UCASE_FMT_1 &&
332        pInfo->dataFormat[2]==UCASE_FMT_2 &&
333        pInfo->dataFormat[3]==UCASE_FMT_3 &&
334        ((pInfo->formatVersion[0]==1 &&
335          pInfo->formatVersion[2]==UTRIE_SHIFT &&
336          pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
337         pInfo->formatVersion[0]==2)
338    )) {
339        udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
340                         pInfo->dataFormat[0], pInfo->dataFormat[1],
341                         pInfo->dataFormat[2], pInfo->dataFormat[3],
342                         pInfo->formatVersion[0]);
343        *pErrorCode=U_UNSUPPORTED_ERROR;
344        return 0;
345    }
346
347    inBytes=(const uint8_t *)inData+headerSize;
348    outBytes=(uint8_t *)outData+headerSize;
349
350    inIndexes=(const int32_t *)inBytes;
351
352    if(length>=0) {
353        length-=headerSize;
354        if(length<16*4) {
355            udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
356                             length);
357            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
358            return 0;
359        }
360    }
361
362    /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
363    for(i=0; i<16; ++i) {
364        indexes[i]=udata_readInt32(ds, inIndexes[i]);
365    }
366
367    /* get the total length of the data */
368    size=indexes[UCASE_IX_LENGTH];
369
370    if(length>=0) {
371        if(length<size) {
372            udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
373                             length);
374            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
375            return 0;
376        }
377
378        /* copy the data for inaccessible bytes */
379        if(inBytes!=outBytes) {
380            uprv_memcpy(outBytes, inBytes, size);
381        }
382
383        offset=0;
384
385        /* swap the int32_t indexes[] */
386        count=indexes[UCASE_IX_INDEX_TOP]*4;
387        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
388        offset+=count;
389
390        /* swap the UTrie */
391        count=indexes[UCASE_IX_TRIE_SIZE];
392        utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
393        offset+=count;
394
395        /* swap the uint16_t exceptions[] and unfold[] */
396        count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
397        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
398        offset+=count;
399
400        U_ASSERT(offset==size);
401    }
402
403    return headerSize+size;
404}
405
406/* Unicode bidi/shaping data swapping --------------------------------------- */
407
408static int32_t U_CALLCONV
409ubidi_swap(const UDataSwapper *ds,
410           const void *inData, int32_t length, void *outData,
411           UErrorCode *pErrorCode) {
412    const UDataInfo *pInfo;
413    int32_t headerSize;
414
415    const uint8_t *inBytes;
416    uint8_t *outBytes;
417
418    const int32_t *inIndexes;
419    int32_t indexes[16];
420
421    int32_t i, offset, count, size;
422
423    /* udata_swapDataHeader checks the arguments */
424    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
425    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
426        return 0;
427    }
428
429    /* check data format and format version */
430    pInfo=(const UDataInfo *)((const char *)inData+4);
431    if(!(
432        pInfo->dataFormat[0]==UBIDI_FMT_0 &&    /* dataFormat="BiDi" */
433        pInfo->dataFormat[1]==UBIDI_FMT_1 &&
434        pInfo->dataFormat[2]==UBIDI_FMT_2 &&
435        pInfo->dataFormat[3]==UBIDI_FMT_3 &&
436        ((pInfo->formatVersion[0]==1 &&
437          pInfo->formatVersion[2]==UTRIE_SHIFT &&
438          pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
439         pInfo->formatVersion[0]==2)
440    )) {
441        udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
442                         pInfo->dataFormat[0], pInfo->dataFormat[1],
443                         pInfo->dataFormat[2], pInfo->dataFormat[3],
444                         pInfo->formatVersion[0]);
445        *pErrorCode=U_UNSUPPORTED_ERROR;
446        return 0;
447    }
448
449    inBytes=(const uint8_t *)inData+headerSize;
450    outBytes=(uint8_t *)outData+headerSize;
451
452    inIndexes=(const int32_t *)inBytes;
453
454    if(length>=0) {
455        length-=headerSize;
456        if(length<16*4) {
457            udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
458                             length);
459            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
460            return 0;
461        }
462    }
463
464    /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
465    for(i=0; i<16; ++i) {
466        indexes[i]=udata_readInt32(ds, inIndexes[i]);
467    }
468
469    /* get the total length of the data */
470    size=indexes[UBIDI_IX_LENGTH];
471
472    if(length>=0) {
473        if(length<size) {
474            udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
475                             length);
476            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
477            return 0;
478        }
479
480        /* copy the data for inaccessible bytes */
481        if(inBytes!=outBytes) {
482            uprv_memcpy(outBytes, inBytes, size);
483        }
484
485        offset=0;
486
487        /* swap the int32_t indexes[] */
488        count=indexes[UBIDI_IX_INDEX_TOP]*4;
489        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
490        offset+=count;
491
492        /* swap the UTrie */
493        count=indexes[UBIDI_IX_TRIE_SIZE];
494        utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
495        offset+=count;
496
497        /* swap the uint32_t mirrors[] */
498        count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
499        ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
500        offset+=count;
501
502        /* just skip the uint8_t jgArray[] */
503        count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
504        offset+=count;
505
506        U_ASSERT(offset==size);
507    }
508
509    return headerSize+size;
510}
511
512/* Unicode normalization data swapping -------------------------------------- */
513
514#if !UCONFIG_NO_NORMALIZATION
515
516static int32_t U_CALLCONV
517unorm_swap(const UDataSwapper *ds,
518           const void *inData, int32_t length, void *outData,
519           UErrorCode *pErrorCode) {
520    const UDataInfo *pInfo;
521    int32_t headerSize;
522
523    const uint8_t *inBytes;
524    uint8_t *outBytes;
525
526    const int32_t *inIndexes;
527    int32_t indexes[32];
528
529    int32_t i, offset, count, size;
530
531    /* udata_swapDataHeader checks the arguments */
532    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
533    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
534        return 0;
535    }
536
537    /* check data format and format version */
538    pInfo=(const UDataInfo *)((const char *)inData+4);
539    if(!(
540        pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Norm" */
541        pInfo->dataFormat[1]==0x6f &&
542        pInfo->dataFormat[2]==0x72 &&
543        pInfo->dataFormat[3]==0x6d &&
544        pInfo->formatVersion[0]==2
545    )) {
546        udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
547                         pInfo->dataFormat[0], pInfo->dataFormat[1],
548                         pInfo->dataFormat[2], pInfo->dataFormat[3],
549                         pInfo->formatVersion[0]);
550        *pErrorCode=U_UNSUPPORTED_ERROR;
551        return 0;
552    }
553
554    inBytes=(const uint8_t *)inData+headerSize;
555    outBytes=(uint8_t *)outData+headerSize;
556
557    inIndexes=(const int32_t *)inBytes;
558
559    if(length>=0) {
560        length-=headerSize;
561        if(length<32*4) {
562            udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
563                             length);
564            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
565            return 0;
566        }
567    }
568
569    /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
570    for(i=0; i<32; ++i) {
571        indexes[i]=udata_readInt32(ds, inIndexes[i]);
572    }
573
574    /* calculate the total length of the data */
575    size=
576        32*4+ /* size of indexes[] */
577        indexes[_NORM_INDEX_TRIE_SIZE]+
578        indexes[_NORM_INDEX_UCHAR_COUNT]*2+
579        indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
580        indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
581        indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
582        indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
583
584    if(length>=0) {
585        if(length<size) {
586            udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
587                             length);
588            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
589            return 0;
590        }
591
592        /* copy the data for inaccessible bytes */
593        if(inBytes!=outBytes) {
594            uprv_memcpy(outBytes, inBytes, size);
595        }
596
597        offset=0;
598
599        /* swap the indexes[] */
600        count=32*4;
601        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
602        offset+=count;
603
604        /* swap the main UTrie */
605        count=indexes[_NORM_INDEX_TRIE_SIZE];
606        utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
607        offset+=count;
608
609        /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
610        count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
611        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
612        offset+=count;
613
614        /* swap the FCD UTrie */
615        count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
616        if(count!=0) {
617            utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
618            offset+=count;
619        }
620
621        /* swap the aux UTrie */
622        count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
623        if(count!=0) {
624            utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
625            offset+=count;
626        }
627
628        /* swap the uint16_t combiningTable[] */
629        count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
630        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
631        offset+=count;
632    }
633
634    return headerSize+size;
635}
636
637#endif
638
639/* Swap 'Test' data from gentest */
640static int32_t U_CALLCONV
641test_swap(const UDataSwapper *ds,
642           const void *inData, int32_t length, void *outData,
643           UErrorCode *pErrorCode) {
644    const UDataInfo *pInfo;
645    int32_t headerSize;
646
647    const uint8_t *inBytes;
648    uint8_t *outBytes;
649
650    int32_t offset;
651
652    /* udata_swapDataHeader checks the arguments */
653    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
654    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
655        udata_printError(ds, "test_swap(): data header swap failed %s\n", u_errorName(*pErrorCode));
656        return 0;
657    }
658
659    /* check data format and format version */
660    pInfo=(const UDataInfo *)((const char *)inData+4);
661    if(!(
662        pInfo->dataFormat[0]==0x54 &&   /* dataFormat="Norm" */
663        pInfo->dataFormat[1]==0x65 &&
664        pInfo->dataFormat[2]==0x73 &&
665        pInfo->dataFormat[3]==0x74 &&
666        pInfo->formatVersion[0]==1
667    )) {
668        udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
669                         pInfo->dataFormat[0], pInfo->dataFormat[1],
670                         pInfo->dataFormat[2], pInfo->dataFormat[3],
671                         pInfo->formatVersion[0]);
672        *pErrorCode=U_UNSUPPORTED_ERROR;
673        return 0;
674    }
675
676    inBytes=(const uint8_t *)inData+headerSize;
677    outBytes=(uint8_t *)outData+headerSize;
678
679    int32_t size16 = 2; // 16bit plus padding
680    int32_t sizeStr = 5; // 4 char inv-str plus null
681    int32_t size = size16 + sizeStr;
682
683    if(length>=0) {
684        if(length<size) {
685            udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
686                             length, size);
687            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
688            return 0;
689        }
690
691	offset =0;
692	/* swap a 1 entry array */
693        ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
694	offset+=size16;
695	ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
696    }
697
698    return headerSize+size;
699}
700
701/* swap any data (except a .dat package) ------------------------------------ */
702
703static const struct {
704    uint8_t dataFormat[4];
705    UDataSwapFn *swapFn;
706} swapFns[]={
707    { { 0x52, 0x65, 0x73, 0x42 }, ures_swap },          /* dataFormat="ResB" */
708#if !UCONFIG_NO_LEGACY_CONVERSION
709    { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap },          /* dataFormat="cnvt" */
710#endif
711#if !UCONFIG_NO_CONVERSION
712    { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases },   /* dataFormat="CvAl" */
713#endif
714#if !UCONFIG_NO_IDNA
715    { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap },        /* dataFormat="SPRP" */
716#endif
717    /* insert data formats here, descending by expected frequency of occurrence */
718    { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap },        /* dataFormat="UPro" */
719
720    { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
721                                  ucase_swap },         /* dataFormat="cAsE" */
722
723    { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
724                                  ubidi_swap },         /* dataFormat="BiDi" */
725
726#if !UCONFIG_NO_NORMALIZATION
727    { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap },         /* dataFormat="Norm" */
728    { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap },        /* dataFormat="Nrm2" */
729#endif
730#if !UCONFIG_NO_COLLATION
731    { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap },          /* dataFormat="UCol" */
732    { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
733#endif
734#if !UCONFIG_NO_BREAK_ITERATION
735    { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap },          /* dataFormat="Brk " */
736    { { 0x54, 0x72, 0x44, 0x63 }, triedict_swap },      /* dataFormat="TrDc " */
737#endif
738    { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap },        /* dataFormat="pnam" */
739    { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames },    /* dataFormat="unam" */
740#if !UCONFIG_NO_NORMALIZATION
741    { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap },         /* dataFormat="Cfu " */
742#endif
743    { { 0x54, 0x65, 0x73, 0x74 }, test_swap }            /* dataFormat="Test" */
744};
745
746U_CAPI int32_t U_EXPORT2
747udata_swap(const UDataSwapper *ds,
748           const void *inData, int32_t length, void *outData,
749           UErrorCode *pErrorCode) {
750    char dataFormatChars[4];
751    const UDataInfo *pInfo;
752    int32_t headerSize, i, swappedLength;
753
754    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
755        return 0;
756    }
757
758    /*
759     * Preflight the header first; checks for illegal arguments, too.
760     * Do not swap the header right away because the format-specific swapper
761     * will swap it, get the headerSize again, and also use the header
762     * information. Otherwise we would have to pass some of the information
763     * and not be able to use the UDataSwapFn signature.
764     */
765    headerSize=udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
766
767    /*
768     * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
769     * then we could check here for further known magic values and structures.
770     */
771    if(U_FAILURE(*pErrorCode)) {
772        return 0; /* the data format was not recognized */
773    }
774
775    pInfo=(const UDataInfo *)((const char *)inData+4);
776
777    {
778        /* convert the data format from ASCII to Unicode to the system charset */
779        UChar u[4]={
780             pInfo->dataFormat[0], pInfo->dataFormat[1],
781             pInfo->dataFormat[2], pInfo->dataFormat[3]
782        };
783
784        if(uprv_isInvariantUString(u, 4)) {
785            u_UCharsToChars(u, dataFormatChars, 4);
786        } else {
787            dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
788        }
789    }
790
791    /* dispatch to the swap function for the dataFormat */
792    for(i=0; i<LENGTHOF(swapFns); ++i) {
793        if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
794            swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
795
796            if(U_FAILURE(*pErrorCode)) {
797                udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
798                                 pInfo->dataFormat[0], pInfo->dataFormat[1],
799                                 pInfo->dataFormat[2], pInfo->dataFormat[3],
800                                 dataFormatChars[0], dataFormatChars[1],
801                                 dataFormatChars[2], dataFormatChars[3],
802                                 u_errorName(*pErrorCode));
803            } else if(swappedLength<(length-15)) {
804                /* swapped less than expected */
805                udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
806                                 swappedLength, length,
807                                 pInfo->dataFormat[0], pInfo->dataFormat[1],
808                                 pInfo->dataFormat[2], pInfo->dataFormat[3],
809                                 dataFormatChars[0], dataFormatChars[1],
810                                 dataFormatChars[2], dataFormatChars[3],
811                                 u_errorName(*pErrorCode));
812            }
813
814            return swappedLength;
815        }
816    }
817
818    /* the dataFormat was not recognized */
819    udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
820                     pInfo->dataFormat[0], pInfo->dataFormat[1],
821                     pInfo->dataFormat[2], pInfo->dataFormat[3],
822                     dataFormatChars[0], dataFormatChars[1],
823                     dataFormatChars[2], dataFormatChars[3]);
824
825    *pErrorCode=U_UNSUPPORTED_ERROR;
826    return 0;
827}
828