1/*
2*******************************************************************************
3*
4*   Copyright (C) 2005-2012, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  swapimpl.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2005may05
14*   created by: Markus W. Scherer
15*
16*   Data file swapping functions moved here from the common library
17*   because some data is hardcoded in ICU4C and needs not be swapped any more.
18*   Moving the functions here simplifies testing (for code coverage) because
19*   we need not jump through hoops (like adding snapshots of these files
20*   to testdata).
21*
22*   The declarations for these functions remain in the internal header files
23*   in icu/source/common/
24*/
25
26#include "unicode/utypes.h"
27#include "unicode/putil.h"
28#include "unicode/udata.h"
29
30/* Explicit include statement for std_string.h is needed
31 * for compilation on certain platforms. (e.g. AIX/VACPP)
32 */
33#include "unicode/std_string.h"
34
35#include "cmemory.h"
36#include "cstring.h"
37#include "uinvchar.h"
38#include "uassert.h"
39#include "uarrsort.h"
40#include "ucmndata.h"
41#include "udataswp.h"
42
43/* swapping implementations in common */
44
45#include "uresdata.h"
46#include "ucnv_io.h"
47#include "uprops.h"
48#include "ucase.h"
49#include "ubidi_props.h"
50#include "ucol_swp.h"
51#include "ucnv_bld.h"
52#include "unormimp.h"
53#include "normalizer2impl.h"
54#include "sprpimpl.h"
55#include "propname.h"
56#include "rbbidata.h"
57#include "utrie2.h"
58#include "dictionarydata.h"
59
60/* swapping implementations in i18n */
61
62#if !UCONFIG_NO_NORMALIZATION
63#include "uspoof_impl.h"
64#endif
65
66U_NAMESPACE_USE
67
68/* definitions */
69
70#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
71
72/* Unicode property (value) aliases data swapping --------------------------- */
73
74static int32_t U_CALLCONV
75upname_swap(const UDataSwapper *ds,
76            const void *inData, int32_t length, void *outData,
77            UErrorCode *pErrorCode) {
78    /* udata_swapDataHeader checks the arguments */
79    int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
80    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
81        return 0;
82    }
83
84    /* check data format and format version */
85    const UDataInfo *pInfo=
86        reinterpret_cast<const UDataInfo *>(
87            static_cast<const char *>(inData)+4);
88    if(!(
89        pInfo->dataFormat[0]==0x70 &&   /* dataFormat="pnam" */
90        pInfo->dataFormat[1]==0x6e &&
91        pInfo->dataFormat[2]==0x61 &&
92        pInfo->dataFormat[3]==0x6d &&
93        pInfo->formatVersion[0]==2
94    )) {
95        udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
96                         pInfo->dataFormat[0], pInfo->dataFormat[1],
97                         pInfo->dataFormat[2], pInfo->dataFormat[3],
98                         pInfo->formatVersion[0]);
99        *pErrorCode=U_UNSUPPORTED_ERROR;
100        return 0;
101    }
102
103    const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize;
104    uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize;
105
106    if(length>=0) {
107        length-=headerSize;
108        // formatVersion 2 initially has indexes[8], 32 bytes.
109        if(length<32) {
110            udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
111                             (int)length);
112            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
113            return 0;
114        }
115    }
116
117    const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
118    int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
119    if(length>=0) {
120        if(length<totalSize) {
121            udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
122                             "for pnames.icu\n",
123                             (int)length, (int)totalSize);
124            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
125            return 0;
126        }
127
128        int32_t numBytesIndexesAndValueMaps=
129            udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
130
131        // Swap the indexes[] and the valueMaps[].
132        ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
133
134        // Copy the rest of the data.
135        if(inBytes!=outBytes) {
136            uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
137                        inBytes+numBytesIndexesAndValueMaps,
138                        totalSize-numBytesIndexesAndValueMaps);
139        }
140
141        // We need not swap anything else:
142        //
143        // The ByteTries are already byte-serialized, and are fixed on ASCII.
144        // (On an EBCDIC machine, the input string is converted to lowercase ASCII
145        // while matching.)
146        //
147        // The name groups are mostly invariant characters, but since we only
148        // generate, and keep in subversion, ASCII versions of pnames.icu,
149        // and since only ICU4J uses the pnames.icu data file
150        // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
151        // we just copy those bytes too.
152    }
153
154    return headerSize+totalSize;
155}
156
157/* Unicode properties data swapping ----------------------------------------- */
158
159static int32_t U_CALLCONV
160uprops_swap(const UDataSwapper *ds,
161            const void *inData, int32_t length, void *outData,
162            UErrorCode *pErrorCode) {
163    const UDataInfo *pInfo;
164    int32_t headerSize, i;
165
166    int32_t dataIndexes[UPROPS_INDEX_COUNT];
167    const int32_t *inData32;
168
169    /* udata_swapDataHeader checks the arguments */
170    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
171    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
172        return 0;
173    }
174
175    /* check data format and format version */
176    pInfo=(const UDataInfo *)((const char *)inData+4);
177    if(!(
178        pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UPro" */
179        pInfo->dataFormat[1]==0x50 &&
180        pInfo->dataFormat[2]==0x72 &&
181        pInfo->dataFormat[3]==0x6f &&
182        (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
183        (pInfo->formatVersion[0]>=7 ||
184            (pInfo->formatVersion[2]==UTRIE_SHIFT &&
185             pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
186    )) {
187        udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
188                         pInfo->dataFormat[0], pInfo->dataFormat[1],
189                         pInfo->dataFormat[2], pInfo->dataFormat[3],
190                         pInfo->formatVersion[0]);
191        *pErrorCode=U_UNSUPPORTED_ERROR;
192        return 0;
193    }
194
195    /* the properties file must contain at least the indexes array */
196    if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
197        udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
198                         length-headerSize);
199        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
200        return 0;
201    }
202
203    /* read the indexes */
204    inData32=(const int32_t *)((const char *)inData+headerSize);
205    for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
206        dataIndexes[i]=udata_readInt32(ds, inData32[i]);
207    }
208
209    /*
210     * comments are copied from the data format description in genprops/store.c
211     * indexes[] constants are in uprops.h
212     */
213    int32_t dataTop;
214    if(length>=0) {
215        int32_t *outData32;
216
217        /*
218         * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
219         * In earlier formatVersions, it is 0 and a lower dataIndexes entry
220         * has the top of the last item.
221         */
222        for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
223
224        if((length-headerSize)<(4*dataTop)) {
225            udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
226                             length-headerSize);
227            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
228            return 0;
229        }
230
231        outData32=(int32_t *)((char *)outData+headerSize);
232
233        /* copy everything for inaccessible data (padding) */
234        if(inData32!=outData32) {
235            uprv_memcpy(outData32, inData32, 4*dataTop);
236        }
237
238        /* swap the indexes[16] */
239        ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
240
241        /*
242         * swap the main properties UTrie
243         * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
244         */
245        utrie2_swapAnyVersion(ds,
246            inData32+UPROPS_INDEX_COUNT,
247            4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
248            outData32+UPROPS_INDEX_COUNT,
249            pErrorCode);
250
251        /*
252         * swap the properties and exceptions words
253         * P  const uint32_t props32[i1-i0];
254         * E  const uint32_t exceptions[i2-i1];
255         */
256        ds->swapArray32(ds,
257            inData32+dataIndexes[UPROPS_PROPS32_INDEX],
258            4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
259            outData32+dataIndexes[UPROPS_PROPS32_INDEX],
260            pErrorCode);
261
262        /*
263         * swap the UChars
264         * U  const UChar uchars[2*(i3-i2)];
265         */
266        ds->swapArray16(ds,
267            inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
268            4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
269            outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
270            pErrorCode);
271
272        /*
273         * swap the additional UTrie
274         * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
275         */
276        utrie2_swapAnyVersion(ds,
277            inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
278            4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
279            outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
280            pErrorCode);
281
282        /*
283         * swap the properties vectors
284         * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
285         */
286        ds->swapArray32(ds,
287            inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
288            4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
289            outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
290            pErrorCode);
291
292        // swap the Script_Extensions data
293        // SCX const uint16_t scriptExtensions[2*(i7-i6)];
294        ds->swapArray16(ds,
295            inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
296            4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
297            outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
298            pErrorCode);
299    }
300
301    /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
302    return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
303}
304
305/* Unicode case mapping data swapping --------------------------------------- */
306
307static int32_t U_CALLCONV
308ucase_swap(const UDataSwapper *ds,
309           const void *inData, int32_t length, void *outData,
310           UErrorCode *pErrorCode) {
311    const UDataInfo *pInfo;
312    int32_t headerSize;
313
314    const uint8_t *inBytes;
315    uint8_t *outBytes;
316
317    const int32_t *inIndexes;
318    int32_t indexes[16];
319
320    int32_t i, offset, count, size;
321
322    /* udata_swapDataHeader checks the arguments */
323    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
324    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
325        return 0;
326    }
327
328    /* check data format and format version */
329    pInfo=(const UDataInfo *)((const char *)inData+4);
330    if(!(
331        pInfo->dataFormat[0]==UCASE_FMT_0 &&    /* dataFormat="cAsE" */
332        pInfo->dataFormat[1]==UCASE_FMT_1 &&
333        pInfo->dataFormat[2]==UCASE_FMT_2 &&
334        pInfo->dataFormat[3]==UCASE_FMT_3 &&
335        ((pInfo->formatVersion[0]==1 &&
336          pInfo->formatVersion[2]==UTRIE_SHIFT &&
337          pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
338         pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3)
339    )) {
340        udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
341                         pInfo->dataFormat[0], pInfo->dataFormat[1],
342                         pInfo->dataFormat[2], pInfo->dataFormat[3],
343                         pInfo->formatVersion[0]);
344        *pErrorCode=U_UNSUPPORTED_ERROR;
345        return 0;
346    }
347
348    inBytes=(const uint8_t *)inData+headerSize;
349    outBytes=(uint8_t *)outData+headerSize;
350
351    inIndexes=(const int32_t *)inBytes;
352
353    if(length>=0) {
354        length-=headerSize;
355        if(length<16*4) {
356            udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
357                             length);
358            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
359            return 0;
360        }
361    }
362
363    /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
364    for(i=0; i<16; ++i) {
365        indexes[i]=udata_readInt32(ds, inIndexes[i]);
366    }
367
368    /* get the total length of the data */
369    size=indexes[UCASE_IX_LENGTH];
370
371    if(length>=0) {
372        if(length<size) {
373            udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
374                             length);
375            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
376            return 0;
377        }
378
379        /* copy the data for inaccessible bytes */
380        if(inBytes!=outBytes) {
381            uprv_memcpy(outBytes, inBytes, size);
382        }
383
384        offset=0;
385
386        /* swap the int32_t indexes[] */
387        count=indexes[UCASE_IX_INDEX_TOP]*4;
388        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
389        offset+=count;
390
391        /* swap the UTrie */
392        count=indexes[UCASE_IX_TRIE_SIZE];
393        utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
394        offset+=count;
395
396        /* swap the uint16_t exceptions[] and unfold[] */
397        count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
398        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
399        offset+=count;
400
401        U_ASSERT(offset==size);
402    }
403
404    return headerSize+size;
405}
406
407/* Unicode bidi/shaping data swapping --------------------------------------- */
408
409static int32_t U_CALLCONV
410ubidi_swap(const UDataSwapper *ds,
411           const void *inData, int32_t length, void *outData,
412           UErrorCode *pErrorCode) {
413    const UDataInfo *pInfo;
414    int32_t headerSize;
415
416    const uint8_t *inBytes;
417    uint8_t *outBytes;
418
419    const int32_t *inIndexes;
420    int32_t indexes[16];
421
422    int32_t i, offset, count, size;
423
424    /* udata_swapDataHeader checks the arguments */
425    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
426    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
427        return 0;
428    }
429
430    /* check data format and format version */
431    pInfo=(const UDataInfo *)((const char *)inData+4);
432    if(!(
433        pInfo->dataFormat[0]==UBIDI_FMT_0 &&    /* dataFormat="BiDi" */
434        pInfo->dataFormat[1]==UBIDI_FMT_1 &&
435        pInfo->dataFormat[2]==UBIDI_FMT_2 &&
436        pInfo->dataFormat[3]==UBIDI_FMT_3 &&
437        ((pInfo->formatVersion[0]==1 &&
438          pInfo->formatVersion[2]==UTRIE_SHIFT &&
439          pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
440         pInfo->formatVersion[0]==2)
441    )) {
442        udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
443                         pInfo->dataFormat[0], pInfo->dataFormat[1],
444                         pInfo->dataFormat[2], pInfo->dataFormat[3],
445                         pInfo->formatVersion[0]);
446        *pErrorCode=U_UNSUPPORTED_ERROR;
447        return 0;
448    }
449
450    inBytes=(const uint8_t *)inData+headerSize;
451    outBytes=(uint8_t *)outData+headerSize;
452
453    inIndexes=(const int32_t *)inBytes;
454
455    if(length>=0) {
456        length-=headerSize;
457        if(length<16*4) {
458            udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
459                             length);
460            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
461            return 0;
462        }
463    }
464
465    /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
466    for(i=0; i<16; ++i) {
467        indexes[i]=udata_readInt32(ds, inIndexes[i]);
468    }
469
470    /* get the total length of the data */
471    size=indexes[UBIDI_IX_LENGTH];
472
473    if(length>=0) {
474        if(length<size) {
475            udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
476                             length);
477            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
478            return 0;
479        }
480
481        /* copy the data for inaccessible bytes */
482        if(inBytes!=outBytes) {
483            uprv_memcpy(outBytes, inBytes, size);
484        }
485
486        offset=0;
487
488        /* swap the int32_t indexes[] */
489        count=indexes[UBIDI_IX_INDEX_TOP]*4;
490        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
491        offset+=count;
492
493        /* swap the UTrie */
494        count=indexes[UBIDI_IX_TRIE_SIZE];
495        utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
496        offset+=count;
497
498        /* swap the uint32_t mirrors[] */
499        count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
500        ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
501        offset+=count;
502
503        /* just skip the uint8_t jgArray[] */
504        count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
505        offset+=count;
506
507        U_ASSERT(offset==size);
508    }
509
510    return headerSize+size;
511}
512
513/* Unicode normalization data swapping -------------------------------------- */
514
515#if !UCONFIG_NO_NORMALIZATION
516
517static int32_t U_CALLCONV
518unorm_swap(const UDataSwapper *ds,
519           const void *inData, int32_t length, void *outData,
520           UErrorCode *pErrorCode) {
521    const UDataInfo *pInfo;
522    int32_t headerSize;
523
524    const uint8_t *inBytes;
525    uint8_t *outBytes;
526
527    const int32_t *inIndexes;
528    int32_t indexes[32];
529
530    int32_t i, offset, count, size;
531
532    /* udata_swapDataHeader checks the arguments */
533    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
534    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
535        return 0;
536    }
537
538    /* check data format and format version */
539    pInfo=(const UDataInfo *)((const char *)inData+4);
540    if(!(
541        pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Norm" */
542        pInfo->dataFormat[1]==0x6f &&
543        pInfo->dataFormat[2]==0x72 &&
544        pInfo->dataFormat[3]==0x6d &&
545        pInfo->formatVersion[0]==2
546    )) {
547        udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
548                         pInfo->dataFormat[0], pInfo->dataFormat[1],
549                         pInfo->dataFormat[2], pInfo->dataFormat[3],
550                         pInfo->formatVersion[0]);
551        *pErrorCode=U_UNSUPPORTED_ERROR;
552        return 0;
553    }
554
555    inBytes=(const uint8_t *)inData+headerSize;
556    outBytes=(uint8_t *)outData+headerSize;
557
558    inIndexes=(const int32_t *)inBytes;
559
560    if(length>=0) {
561        length-=headerSize;
562        if(length<32*4) {
563            udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
564                             length);
565            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
566            return 0;
567        }
568    }
569
570    /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
571    for(i=0; i<32; ++i) {
572        indexes[i]=udata_readInt32(ds, inIndexes[i]);
573    }
574
575    /* calculate the total length of the data */
576    size=
577        32*4+ /* size of indexes[] */
578        indexes[_NORM_INDEX_TRIE_SIZE]+
579        indexes[_NORM_INDEX_UCHAR_COUNT]*2+
580        indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
581        indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
582        indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
583        indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
584
585    if(length>=0) {
586        if(length<size) {
587            udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
588                             length);
589            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
590            return 0;
591        }
592
593        /* copy the data for inaccessible bytes */
594        if(inBytes!=outBytes) {
595            uprv_memcpy(outBytes, inBytes, size);
596        }
597
598        offset=0;
599
600        /* swap the indexes[] */
601        count=32*4;
602        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
603        offset+=count;
604
605        /* swap the main UTrie */
606        count=indexes[_NORM_INDEX_TRIE_SIZE];
607        utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
608        offset+=count;
609
610        /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
611        count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
612        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
613        offset+=count;
614
615        /* swap the FCD UTrie */
616        count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
617        if(count!=0) {
618            utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
619            offset+=count;
620        }
621
622        /* swap the aux UTrie */
623        count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
624        if(count!=0) {
625            utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
626            offset+=count;
627        }
628
629        /* swap the uint16_t combiningTable[] */
630        count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
631        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
632        offset+=count;
633    }
634
635    return headerSize+size;
636}
637
638#endif
639
640/* Swap 'Test' data from gentest */
641static int32_t U_CALLCONV
642test_swap(const UDataSwapper *ds,
643           const void *inData, int32_t length, void *outData,
644           UErrorCode *pErrorCode) {
645    const UDataInfo *pInfo;
646    int32_t headerSize;
647
648    const uint8_t *inBytes;
649    uint8_t *outBytes;
650
651    int32_t offset;
652
653    /* udata_swapDataHeader checks the arguments */
654    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
655    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
656        udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL");
657        return 0;
658    }
659
660    /* check data format and format version */
661    pInfo=(const UDataInfo *)((const char *)inData+4);
662    if(!(
663        pInfo->dataFormat[0]==0x54 &&   /* dataFormat="Norm" */
664        pInfo->dataFormat[1]==0x65 &&
665        pInfo->dataFormat[2]==0x73 &&
666        pInfo->dataFormat[3]==0x74 &&
667        pInfo->formatVersion[0]==1
668    )) {
669        udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
670                         pInfo->dataFormat[0], pInfo->dataFormat[1],
671                         pInfo->dataFormat[2], pInfo->dataFormat[3],
672                         pInfo->formatVersion[0]);
673        *pErrorCode=U_UNSUPPORTED_ERROR;
674        return 0;
675    }
676
677    inBytes=(const uint8_t *)inData+headerSize;
678    outBytes=(uint8_t *)outData+headerSize;
679
680    int32_t size16 = 2; // 16bit plus padding
681    int32_t sizeStr = 5; // 4 char inv-str plus null
682    int32_t size = size16 + sizeStr;
683
684    if(length>=0) {
685        if(length<size) {
686            udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
687                             length, size);
688            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
689            return 0;
690        }
691
692	offset =0;
693	/* swap a 1 entry array */
694        ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
695	offset+=size16;
696	ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
697    }
698
699    return headerSize+size;
700}
701
702/* swap any data (except a .dat package) ------------------------------------ */
703
704static const struct {
705    uint8_t dataFormat[4];
706    UDataSwapFn *swapFn;
707} swapFns[]={
708    { { 0x52, 0x65, 0x73, 0x42 }, ures_swap },          /* dataFormat="ResB" */
709#if !UCONFIG_NO_LEGACY_CONVERSION
710    { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap },          /* dataFormat="cnvt" */
711#endif
712#if !UCONFIG_NO_CONVERSION
713    { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases },   /* dataFormat="CvAl" */
714#endif
715#if !UCONFIG_NO_IDNA
716    { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap },        /* dataFormat="SPRP" */
717#endif
718    /* insert data formats here, descending by expected frequency of occurrence */
719    { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap },        /* dataFormat="UPro" */
720
721    { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
722                                  ucase_swap },         /* dataFormat="cAsE" */
723
724    { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
725                                  ubidi_swap },         /* dataFormat="BiDi" */
726
727#if !UCONFIG_NO_NORMALIZATION
728    { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap },         /* dataFormat="Norm" */
729    { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap },        /* dataFormat="Nrm2" */
730#endif
731#if !UCONFIG_NO_COLLATION
732    { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap },          /* dataFormat="UCol" */
733    { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
734#endif
735#if !UCONFIG_NO_BREAK_ITERATION
736    { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap },          /* dataFormat="Brk " */
737    { { 0x44, 0x69, 0x63, 0x74 }, udict_swap },         /* dataFormat="Dict" */
738#endif
739    { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap },        /* dataFormat="pnam" */
740    { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames },    /* dataFormat="unam" */
741#if !UCONFIG_NO_NORMALIZATION
742    { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap },         /* dataFormat="Cfu " */
743#endif
744    { { 0x54, 0x65, 0x73, 0x74 }, test_swap }            /* dataFormat="Test" */
745};
746
747U_CAPI int32_t U_EXPORT2
748udata_swap(const UDataSwapper *ds,
749           const void *inData, int32_t length, void *outData,
750           UErrorCode *pErrorCode) {
751    char dataFormatChars[4];
752    const UDataInfo *pInfo;
753    int32_t i, swappedLength;
754
755    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
756        return 0;
757    }
758
759    /*
760     * Preflight the header first; checks for illegal arguments, too.
761     * Do not swap the header right away because the format-specific swapper
762     * will swap it, get the headerSize again, and also use the header
763     * information. Otherwise we would have to pass some of the information
764     * and not be able to use the UDataSwapFn signature.
765     */
766    udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
767
768    /*
769     * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
770     * then we could check here for further known magic values and structures.
771     */
772    if(U_FAILURE(*pErrorCode)) {
773        return 0; /* the data format was not recognized */
774    }
775
776    pInfo=(const UDataInfo *)((const char *)inData+4);
777
778    {
779        /* convert the data format from ASCII to Unicode to the system charset */
780        UChar u[4]={
781             pInfo->dataFormat[0], pInfo->dataFormat[1],
782             pInfo->dataFormat[2], pInfo->dataFormat[3]
783        };
784
785        if(uprv_isInvariantUString(u, 4)) {
786            u_UCharsToChars(u, dataFormatChars, 4);
787        } else {
788            dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
789        }
790    }
791
792    /* dispatch to the swap function for the dataFormat */
793    for(i=0; i<LENGTHOF(swapFns); ++i) {
794        if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
795            swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
796
797            if(U_FAILURE(*pErrorCode)) {
798                udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
799                                 pInfo->dataFormat[0], pInfo->dataFormat[1],
800                                 pInfo->dataFormat[2], pInfo->dataFormat[3],
801                                 dataFormatChars[0], dataFormatChars[1],
802                                 dataFormatChars[2], dataFormatChars[3],
803                                 u_errorName(*pErrorCode));
804            } else if(swappedLength<(length-15)) {
805                /* swapped less than expected */
806                udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
807                                 swappedLength, length,
808                                 pInfo->dataFormat[0], pInfo->dataFormat[1],
809                                 pInfo->dataFormat[2], pInfo->dataFormat[3],
810                                 dataFormatChars[0], dataFormatChars[1],
811                                 dataFormatChars[2], dataFormatChars[3],
812                                 u_errorName(*pErrorCode));
813            }
814
815            return swappedLength;
816        }
817    }
818
819    /* the dataFormat was not recognized */
820    udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
821                     pInfo->dataFormat[0], pInfo->dataFormat[1],
822                     pInfo->dataFormat[2], pInfo->dataFormat[3],
823                     dataFormatChars[0], dataFormatChars[1],
824                     dataFormatChars[2], dataFormatChars[3]);
825
826    *pErrorCode=U_UNSUPPORTED_ERROR;
827    return 0;
828}
829