1/*
2******************************************************************************
3*
4*   Copyright (C) 1999-2008, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*
9*
10*  ucnv_io.c:
11*  initializes global variables and defines functions pertaining to converter
12*  name resolution aspect of the conversion code.
13*
14*   new implementation:
15*
16*   created on: 1999nov22
17*   created by: Markus W. Scherer
18*
19*   Use the binary cnvalias.icu (created from convrtrs.txt) to work
20*   with aliases for converter names.
21*
22*   Date        Name        Description
23*   11/22/1999  markus      Created
24*   06/28/2002  grhoten     Major overhaul of the converter alias design.
25*                           Now an alias can map to different converters
26*                           depending on the specified standard.
27*******************************************************************************
28*/
29
30#include "unicode/utypes.h"
31
32#if !UCONFIG_NO_CONVERSION
33
34#include "unicode/ucnv.h"
35#include "unicode/udata.h"
36
37#include "umutex.h"
38#include "uarrsort.h"
39#include "udataswp.h"
40#include "cstring.h"
41#include "cmemory.h"
42#include "ucnv_io.h"
43#include "uenumimp.h"
44#include "ucln_cmn.h"
45
46/* Format of cnvalias.icu -----------------------------------------------------
47 *
48 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
49 * This binary form contains several tables. All indexes are to uint16_t
50 * units, and not to the bytes (uint8_t units). Addressing everything on
51 * 16-bit boundaries allows us to store more information with small index
52 * numbers, which are also 16-bit in size. The majority of the table (except
53 * the string table) are 16-bit numbers.
54 *
55 * First there is the size of the Table of Contents (TOC). The TOC
56 * entries contain the size of each section. In order to find the offset
57 * you just need to sum up the previous offsets.
58 * The TOC length and entries are an array of uint32_t values.
59 * The first section after the TOC starts immediately after the TOC.
60 *
61 * 1) This section contains a list of converters. This list contains indexes
62 * into the string table for the converter name. The index of this list is
63 * also used by other sections, which are mentioned later on.
64 * This list is not sorted.
65 *
66 * 2) This section contains a list of tags. This list contains indexes
67 * into the string table for the tag name. The index of this list is
68 * also used by other sections, which are mentioned later on.
69 * This list is in priority order of standards.
70 *
71 * 3) This section contains a list of sorted unique aliases. This
72 * list contains indexes into the string table for the alias name. The
73 * index of this list is also used by other sections, like the 4th section.
74 * The index for the 3rd and 4th section is used to get the
75 * alias -> converter name mapping. Section 3 and 4 form a two column table.
76 * Some of the most significant bits of each index may contain other
77 * information (see findConverter for details).
78 *
79 * 4) This section contains a list of mapped converter names. Consider this
80 * as a table that maps the 3rd section to the 1st section. This list contains
81 * indexes into the 1st section. The index of this list is the same index in
82 * the 3rd section. There is also some extra information in the high bits of
83 * each converter index in this table. Currently it's only used to say that
84 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
85 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
86 * the predigested form of the 5th section so that an alias lookup can be fast.
87 *
88 * 5) This section contains a 2D array with indexes to the 6th section. This
89 * section is the full form of all alias mappings. The column index is the
90 * index into the converter list (column header). The row index is the index
91 * to tag list (row header). This 2D array is the top part a 3D array. The
92 * third dimension is in the 6th section.
93 *
94 * 6) This is blob of variable length arrays. Each array starts with a size,
95 * and is followed by indexes to alias names in the string table. This is
96 * the third dimension to the section 5. No other section should be referencing
97 * this section.
98 *
99 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
100 * presence indicates that a section 9 exists. UConverterAliasOptions specifies
101 * what type of string normalization is used among other potential things in the
102 * future.
103 *
104 * 8) This is the string table. All strings are indexed on an even address.
105 * There are two reasons for this. First many chip architectures locate strings
106 * faster on even address boundaries. Second, since all indexes are 16-bit
107 * numbers, this string table can be 128KB in size instead of 64KB when we
108 * only have strings starting on an even address.
109 *
110 * 9) When present this is a set of prenormalized strings from section 8. This
111 * table contains normalized strings with the dashes and spaces stripped out,
112 * and all strings lowercased. In the future, the options in section 7 may state
113 * other types of normalization.
114 *
115 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
116 * has a unique alias among all converters. That same alias can
117 * be mentioned in other standards on different converters,
118 * but only one alias per tag can be unique.
119 *
120 *
121 *              Converter Names (Usually in TR22 form)
122 *           -------------------------------------------.
123 *     T    /                                          /|
124 *     a   /                                          / |
125 *     g  /                                          /  |
126 *     s /                                          /   |
127 *      /                                          /    |
128 *      ------------------------------------------/     |
129 *    A |                                         |     |
130 *    l |                                         |     |
131 *    i |                                         |    /
132 *    a |                                         |   /
133 *    s |                                         |  /
134 *    e |                                         | /
135 *    s |                                         |/
136 *      -------------------------------------------
137 *
138 *
139 *
140 * Here is what it really looks like. It's like swiss cheese.
141 * There are holes. Some converters aren't recognized by
142 * a standard, or they are really old converters that the
143 * standard doesn't recognize anymore.
144 *
145 *              Converter Names (Usually in TR22 form)
146 *           -------------------------------------------.
147 *     T    /##########################################/|
148 *     a   /     #            #                       /#
149 *     g  /  #      ##     ##     ### # ### ### ### #/
150 *     s / #             #####  ####        ##  ## #/#
151 *      / ### # # ##  #  #   #          ### # #   #/##
152 *      ------------------------------------------/# #
153 *    A |### # # ##  #  #   #          ### # #   #|# #
154 *    l |# # #    #     #               ## #     #|# #
155 *    i |# # #    #     #                #       #|#
156 *    a |#                                       #|#
157 *    s |                                        #|#
158 *    e
159 *    s
160 *
161 */
162
163/**
164 * Used by the UEnumeration API
165 */
166typedef struct UAliasContext {
167    uint32_t listOffset;
168    uint32_t listIdx;
169} UAliasContext;
170
171static const char DATA_NAME[] = "cnvalias";
172static const char DATA_TYPE[] = "icu";
173
174static UDataMemory *gAliasData=NULL;
175
176enum {
177    tocLengthIndex=0,
178    converterListIndex=1,
179    tagListIndex=2,
180    aliasListIndex=3,
181    untaggedConvArrayIndex=4,
182    taggedAliasArrayIndex=5,
183    taggedAliasListsIndex=6,
184    tableOptionsIndex=7,
185    stringTableIndex=8,
186    normalizedStringTableIndex=9,
187    offsetsCount,    /* length of the swapper's temporary offsets[] */
188    minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
189};
190
191static const UConverterAliasOptions defaultTableOptions = {
192    UCNV_IO_UNNORMALIZED,
193    0 /* containsCnvOptionInfo */
194};
195static UConverterAlias gMainTable;
196
197#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
198#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
199
200static UBool U_CALLCONV
201isAcceptable(void *context,
202             const char *type, const char *name,
203             const UDataInfo *pInfo) {
204    return (UBool)(
205        pInfo->size>=20 &&
206        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
207        pInfo->charsetFamily==U_CHARSET_FAMILY &&
208        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
209        pInfo->dataFormat[1]==0x76 &&
210        pInfo->dataFormat[2]==0x41 &&
211        pInfo->dataFormat[3]==0x6c &&
212        pInfo->formatVersion[0]==3);
213}
214
215static UBool U_CALLCONV ucnv_io_cleanup(void)
216{
217    if (gAliasData) {
218        udata_close(gAliasData);
219        gAliasData = NULL;
220    }
221
222    uprv_memset(&gMainTable, 0, sizeof(gMainTable));
223
224    return TRUE;                   /* Everything was cleaned up */
225}
226
227static UBool
228haveAliasData(UErrorCode *pErrorCode) {
229    int needInit;
230
231    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
232        return FALSE;
233    }
234
235    UMTX_CHECK(NULL, (gAliasData==NULL), needInit);
236
237    /* load converter alias data from file if necessary */
238    if (needInit) {
239        UDataMemory *data;
240        const uint16_t *table;
241        const uint32_t *sectionSizes;
242        uint32_t tableStart;
243        uint32_t currOffset;
244
245        data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
246        if(U_FAILURE(*pErrorCode)) {
247            return FALSE;
248        }
249
250        sectionSizes = (const uint32_t *)udata_getMemory(data);
251        table = (const uint16_t *)sectionSizes;
252
253        tableStart      = sectionSizes[0];
254        if (tableStart < minTocLength) {
255            *pErrorCode = U_INVALID_FORMAT_ERROR;
256            udata_close(data);
257            return FALSE;
258        }
259
260        umtx_lock(NULL);
261        if(gAliasData==NULL) {
262            gMainTable.converterListSize      = sectionSizes[1];
263            gMainTable.tagListSize            = sectionSizes[2];
264            gMainTable.aliasListSize          = sectionSizes[3];
265            gMainTable.untaggedConvArraySize  = sectionSizes[4];
266            gMainTable.taggedAliasArraySize   = sectionSizes[5];
267            gMainTable.taggedAliasListsSize   = sectionSizes[6];
268            gMainTable.optionTableSize        = sectionSizes[7];
269            gMainTable.stringTableSize        = sectionSizes[8];
270
271            if (tableStart > 8) {
272                gMainTable.normalizedStringTableSize = sectionSizes[9];
273            }
274
275            currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
276            gMainTable.converterList = table + currOffset;
277
278            currOffset += gMainTable.converterListSize;
279            gMainTable.tagList = table + currOffset;
280
281            currOffset += gMainTable.tagListSize;
282            gMainTable.aliasList = table + currOffset;
283
284            currOffset += gMainTable.aliasListSize;
285            gMainTable.untaggedConvArray = table + currOffset;
286
287            currOffset += gMainTable.untaggedConvArraySize;
288            gMainTable.taggedAliasArray = table + currOffset;
289
290            /* aliasLists is a 1's based array, but it has a padding character */
291            currOffset += gMainTable.taggedAliasArraySize;
292            gMainTable.taggedAliasLists = table + currOffset;
293
294            currOffset += gMainTable.taggedAliasListsSize;
295            if (gMainTable.optionTableSize > 0
296                && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
297            {
298                /* Faster table */
299                gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
300            }
301            else {
302                /* Smaller table, or I can't handle this normalization mode!
303                Use the original slower table lookup. */
304                gMainTable.optionTable = &defaultTableOptions;
305            }
306
307            currOffset += gMainTable.optionTableSize;
308            gMainTable.stringTable = table + currOffset;
309
310            currOffset += gMainTable.stringTableSize;
311            gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
312                ? gMainTable.stringTable : (table + currOffset));
313
314            ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
315
316            gAliasData = data;
317            data=NULL;
318        }
319        umtx_unlock(NULL);
320
321        /* if a different thread set it first, then close the extra data */
322        if(data!=NULL) {
323            udata_close(data); /* NULL if it was set correctly */
324        }
325    }
326
327    return TRUE;
328}
329
330static U_INLINE UBool
331isAlias(const char *alias, UErrorCode *pErrorCode) {
332    if(alias==NULL) {
333        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
334        return FALSE;
335    }
336    return (UBool)(*alias!=0);
337}
338
339static uint32_t getTagNumber(const char *tagname) {
340    if (gMainTable.tagList) {
341        uint32_t tagNum;
342        for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
343            if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
344                return tagNum;
345            }
346        }
347    }
348
349    return UINT32_MAX;
350}
351
352/* character types relevant for ucnv_compareNames() */
353enum {
354    IGNORE,
355    ZERO,
356    NONZERO,
357    MINLETTER /* any values from here on are lowercase letter mappings */
358};
359
360/* character types for ASCII 00..7F */
361static const uint8_t asciiTypes[128] = {
362    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
364    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
365    ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
366    0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
367    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
368    0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
369    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
370};
371
372#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)IGNORE)
373
374/* character types for EBCDIC 80..FF */
375static const uint8_t ebcdicTypes[128] = {
376    0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
377    0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
378    0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
379    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380    0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
381    0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
382    0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
383    ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
384};
385
386#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)IGNORE)
387
388#if U_CHARSET_FAMILY==U_ASCII_FAMILY
389#   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
390#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
391#   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
392#else
393#   error U_CHARSET_FAMILY is not valid
394#endif
395
396/* @see ucnv_compareNames */
397U_CFUNC char * U_EXPORT2
398ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
399    char *dstItr = dst;
400    uint8_t type, nextType;
401    char c1;
402    UBool afterDigit = FALSE;
403
404    while ((c1 = *name++) != 0) {
405        type = GET_ASCII_TYPE(c1);
406        switch (type) {
407        case IGNORE:
408            afterDigit = FALSE;
409            continue; /* ignore all but letters and digits */
410        case ZERO:
411            if (!afterDigit) {
412                nextType = GET_ASCII_TYPE(*name);
413                if (nextType == ZERO || nextType == NONZERO) {
414                    continue; /* ignore leading zero before another digit */
415                }
416            }
417            break;
418        case NONZERO:
419            afterDigit = TRUE;
420            break;
421        default:
422            c1 = (char)type; /* lowercased letter */
423            afterDigit = FALSE;
424            break;
425        }
426        *dstItr++ = c1;
427    }
428    *dstItr = 0;
429    return dst;
430}
431
432U_CFUNC char * U_EXPORT2
433ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
434    char *dstItr = dst;
435    uint8_t type, nextType;
436    char c1;
437    UBool afterDigit = FALSE;
438
439    while ((c1 = *name++) != 0) {
440        type = GET_EBCDIC_TYPE(c1);
441        switch (type) {
442        case IGNORE:
443            afterDigit = FALSE;
444            continue; /* ignore all but letters and digits */
445        case ZERO:
446            if (!afterDigit) {
447                nextType = GET_EBCDIC_TYPE(*name);
448                if (nextType == ZERO || nextType == NONZERO) {
449                    continue; /* ignore leading zero before another digit */
450                }
451            }
452            break;
453        case NONZERO:
454            afterDigit = TRUE;
455            break;
456        default:
457            c1 = (char)type; /* lowercased letter */
458            afterDigit = FALSE;
459            break;
460        }
461        *dstItr++ = c1;
462    }
463    *dstItr = 0;
464    return dst;
465}
466
467/**
468 * Do a fuzzy compare of two converter/alias names.
469 * The comparison is case-insensitive, ignores leading zeroes if they are not
470 * followed by further digits, and ignores all but letters and digits.
471 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
472 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
473 * at http://www.unicode.org/reports/tr22/
474 *
475 * This is a symmetrical (commutative) operation; order of arguments
476 * is insignificant.  This is an important property for sorting the
477 * list (when the list is preprocessed into binary form) and for
478 * performing binary searches on it at run time.
479 *
480 * @param name1 a converter name or alias, zero-terminated
481 * @param name2 a converter name or alias, zero-terminated
482 * @return 0 if the names match, or a negative value if the name1
483 * lexically precedes name2, or a positive value if the name1
484 * lexically follows name2.
485 *
486 * @see ucnv_io_stripForCompare
487 */
488U_CAPI int U_EXPORT2
489ucnv_compareNames(const char *name1, const char *name2) {
490    int rc;
491    uint8_t type, nextType;
492    char c1, c2;
493    UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
494
495    for (;;) {
496        while ((c1 = *name1++) != 0) {
497            type = GET_CHAR_TYPE(c1);
498            switch (type) {
499            case IGNORE:
500                afterDigit1 = FALSE;
501                continue; /* ignore all but letters and digits */
502            case ZERO:
503                if (!afterDigit1) {
504                    nextType = GET_CHAR_TYPE(*name1);
505                    if (nextType == ZERO || nextType == NONZERO) {
506                        continue; /* ignore leading zero before another digit */
507                    }
508                }
509                break;
510            case NONZERO:
511                afterDigit1 = TRUE;
512                break;
513            default:
514                c1 = (char)type; /* lowercased letter */
515                afterDigit1 = FALSE;
516                break;
517            }
518            break; /* deliver c1 */
519        }
520        while ((c2 = *name2++) != 0) {
521            type = GET_CHAR_TYPE(c2);
522            switch (type) {
523            case IGNORE:
524                afterDigit2 = FALSE;
525                continue; /* ignore all but letters and digits */
526            case ZERO:
527                if (!afterDigit2) {
528                    nextType = GET_CHAR_TYPE(*name2);
529                    if (nextType == ZERO || nextType == NONZERO) {
530                        continue; /* ignore leading zero before another digit */
531                    }
532                }
533                break;
534            case NONZERO:
535                afterDigit2 = TRUE;
536                break;
537            default:
538                c2 = (char)type; /* lowercased letter */
539                afterDigit2 = FALSE;
540                break;
541            }
542            break; /* deliver c2 */
543        }
544
545        /* If we reach the ends of both strings then they match */
546        if ((c1|c2)==0) {
547            return 0;
548        }
549
550        /* Case-insensitive comparison */
551        rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
552        if (rc != 0) {
553            return rc;
554        }
555    }
556}
557
558/*
559 * search for an alias
560 * return the converter number index for gConverterList
561 */
562static U_INLINE uint32_t
563findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
564    uint32_t mid, start, limit;
565    uint32_t lastMid;
566    int result;
567    int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
568    char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
569
570    if (!isUnnormalized) {
571        if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
572            *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
573            return UINT32_MAX;
574        }
575
576        /* Lower case and remove ignoreable characters. */
577        ucnv_io_stripForCompare(strippedName, alias);
578        alias = strippedName;
579    }
580
581    /* do a binary search for the alias */
582    start = 0;
583    limit = gMainTable.untaggedConvArraySize;
584    mid = limit;
585    lastMid = UINT32_MAX;
586
587    for (;;) {
588        mid = (uint32_t)((start + limit) / 2);
589        if (lastMid == mid) {   /* Have we moved? */
590            break;  /* We haven't moved, and it wasn't found. */
591        }
592        lastMid = mid;
593        if (isUnnormalized) {
594            result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
595        }
596        else {
597            result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
598        }
599
600        if (result < 0) {
601            limit = mid;
602        } else if (result > 0) {
603            start = mid;
604        } else {
605            /* Since the gencnval tool folds duplicates into one entry,
606             * this alias in gAliasList is unique, but different standards
607             * may map an alias to different converters.
608             */
609            if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
610                *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
611            }
612            /* State whether the canonical converter name contains an option.
613            This information is contained in this list in order to maintain backward & forward compatibility. */
614            if (containsOption) {
615                UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
616                *containsOption = (UBool)((containsCnvOptionInfo
617                    && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
618                    || !containsCnvOptionInfo);
619            }
620            return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
621        }
622    }
623
624    return UINT32_MAX;
625}
626
627/*
628 * Is this alias in this list?
629 * alias and listOffset should be non-NULL.
630 */
631static U_INLINE UBool
632isAliasInList(const char *alias, uint32_t listOffset) {
633    if (listOffset) {
634        uint32_t currAlias;
635        uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
636        /* +1 to skip listCount */
637        const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
638        for (currAlias = 0; currAlias < listCount; currAlias++) {
639            if (currList[currAlias]
640                && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
641            {
642                return TRUE;
643            }
644        }
645    }
646    return FALSE;
647}
648
649/*
650 * Search for an standard name of an alias (what is the default name
651 * that this standard uses?)
652 * return the listOffset for gTaggedAliasLists. If it's 0,
653 * the it couldn't be found, but the parameters are valid.
654 */
655static uint32_t
656findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
657    uint32_t idx;
658    uint32_t listOffset;
659    uint32_t convNum;
660    UErrorCode myErr = U_ZERO_ERROR;
661    uint32_t tagNum = getTagNumber(standard);
662
663    /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
664    convNum = findConverter(alias, NULL, &myErr);
665    if (myErr != U_ZERO_ERROR) {
666        *pErrorCode = myErr;
667    }
668
669    if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
670        listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
671        if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
672            return listOffset;
673        }
674        if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
675            /* Uh Oh! They used an ambiguous alias.
676               We have to search the whole swiss cheese starting
677               at the highest standard affinity.
678               This may take a while.
679            */
680            for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
681                listOffset = gMainTable.taggedAliasArray[idx];
682                if (listOffset && isAliasInList(alias, listOffset)) {
683                    uint32_t currTagNum = idx/gMainTable.converterListSize;
684                    uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
685                    uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
686                    if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
687                        return tempListOffset;
688                    }
689                    /* else keep on looking */
690                    /* We could speed this up by starting on the next row
691                       because an alias is unique per row, right now.
692                       This would change if alias versioning appears. */
693                }
694            }
695            /* The standard doesn't know about the alias */
696        }
697        /* else no default name */
698        return 0;
699    }
700    /* else converter or tag not found */
701
702    return UINT32_MAX;
703}
704
705/* Return the canonical name */
706static uint32_t
707findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
708    uint32_t idx;
709    uint32_t listOffset;
710    uint32_t convNum;
711    UErrorCode myErr = U_ZERO_ERROR;
712    uint32_t tagNum = getTagNumber(standard);
713
714    /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
715    convNum = findConverter(alias, NULL, &myErr);
716    if (myErr != U_ZERO_ERROR) {
717        *pErrorCode = myErr;
718    }
719
720    if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
721        listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
722        if (listOffset && isAliasInList(alias, listOffset)) {
723            return convNum;
724        }
725        if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
726            /* Uh Oh! They used an ambiguous alias.
727               We have to search one slice of the swiss cheese.
728               We search only in the requested tag, not the whole thing.
729               This may take a while.
730            */
731            uint32_t convStart = (tagNum)*gMainTable.converterListSize;
732            uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
733            for (idx = convStart; idx < convLimit; idx++) {
734                listOffset = gMainTable.taggedAliasArray[idx];
735                if (listOffset && isAliasInList(alias, listOffset)) {
736                    return idx-convStart;
737                }
738            }
739            /* The standard doesn't know about the alias */
740        }
741        /* else no canonical name */
742    }
743    /* else converter or tag not found */
744
745    return UINT32_MAX;
746}
747
748
749
750U_CFUNC const char *
751ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
752    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
753        uint32_t convNum = findConverter(alias, containsOption, pErrorCode);
754        if (convNum < gMainTable.converterListSize) {
755            return GET_STRING(gMainTable.converterList[convNum]);
756        }
757        /* else converter not found */
758    }
759    return NULL;
760}
761
762static int32_t U_CALLCONV
763ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) {
764    int32_t value = 0;
765    UAliasContext *myContext = (UAliasContext *)(enumerator->context);
766    uint32_t listOffset = myContext->listOffset;
767
768    if (listOffset) {
769        value = gMainTable.taggedAliasLists[listOffset];
770    }
771    return value;
772}
773
774static const char* U_CALLCONV
775ucnv_io_nextStandardAliases(UEnumeration *enumerator,
776                            int32_t* resultLength,
777                            UErrorCode *pErrorCode)
778{
779    UAliasContext *myContext = (UAliasContext *)(enumerator->context);
780    uint32_t listOffset = myContext->listOffset;
781
782    if (listOffset) {
783        uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
784        const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
785
786        if (myContext->listIdx < listCount) {
787            const char *myStr = GET_STRING(currList[myContext->listIdx++]);
788            if (resultLength) {
789                *resultLength = (int32_t)uprv_strlen(myStr);
790            }
791            return myStr;
792        }
793    }
794    /* Either we accessed a zero length list, or we enumerated too far. */
795    if (resultLength) {
796        *resultLength = 0;
797    }
798    return NULL;
799}
800
801static void U_CALLCONV
802ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) {
803    ((UAliasContext *)(enumerator->context))->listIdx = 0;
804}
805
806static void U_CALLCONV
807ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
808    uprv_free(enumerator->context);
809    uprv_free(enumerator);
810}
811
812/* Enumerate the aliases for the specified converter and standard tag */
813static const UEnumeration gEnumAliases = {
814    NULL,
815    NULL,
816    ucnv_io_closeUEnumeration,
817    ucnv_io_countStandardAliases,
818    uenum_unextDefault,
819    ucnv_io_nextStandardAliases,
820    ucnv_io_resetStandardAliases
821};
822
823U_CAPI UEnumeration * U_EXPORT2
824ucnv_openStandardNames(const char *convName,
825                       const char *standard,
826                       UErrorCode *pErrorCode)
827{
828    UEnumeration *myEnum = NULL;
829    if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
830        uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
831
832        /* When listOffset == 0, we want to acknowledge that the
833           converter name and standard are okay, but there
834           is nothing to enumerate. */
835        if (listOffset < gMainTable.taggedAliasListsSize) {
836            UAliasContext *myContext;
837
838            myEnum = uprv_malloc(sizeof(UEnumeration));
839            if (myEnum == NULL) {
840                *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
841                return NULL;
842            }
843            uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
844            myContext = uprv_malloc(sizeof(UAliasContext));
845            if (myContext == NULL) {
846                *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
847                uprv_free(myEnum);
848                return NULL;
849            }
850            myContext->listOffset = listOffset;
851            myContext->listIdx = 0;
852            myEnum->context = myContext;
853        }
854        /* else converter or tag not found */
855    }
856    return myEnum;
857}
858
859static uint16_t
860ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
861    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
862        uint32_t convNum = findConverter(alias, NULL, pErrorCode);
863        if (convNum < gMainTable.converterListSize) {
864            /* tagListNum - 1 is the ALL tag */
865            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
866
867            if (listOffset) {
868                return gMainTable.taggedAliasLists[listOffset];
869            }
870            /* else this shouldn't happen. internal program error */
871        }
872        /* else converter not found */
873    }
874    return 0;
875}
876
877static uint16_t
878ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
879    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
880        uint32_t currAlias;
881        uint32_t convNum = findConverter(alias, NULL, pErrorCode);
882        if (convNum < gMainTable.converterListSize) {
883            /* tagListNum - 1 is the ALL tag */
884            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
885
886            if (listOffset) {
887                uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
888                /* +1 to skip listCount */
889                const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
890
891                for (currAlias = start; currAlias < listCount; currAlias++) {
892                    aliases[currAlias] = GET_STRING(currList[currAlias]);
893                }
894            }
895            /* else this shouldn't happen. internal program error */
896        }
897        /* else converter not found */
898    }
899    return 0;
900}
901
902static const char *
903ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
904    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
905        uint32_t convNum = findConverter(alias, NULL, pErrorCode);
906        if (convNum < gMainTable.converterListSize) {
907            /* tagListNum - 1 is the ALL tag */
908            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
909
910            if (listOffset) {
911                uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
912                /* +1 to skip listCount */
913                const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
914
915                if (n < listCount)  {
916                    return GET_STRING(currList[n]);
917                }
918                *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
919            }
920            /* else this shouldn't happen. internal program error */
921        }
922        /* else converter not found */
923    }
924    return NULL;
925}
926
927static uint16_t
928ucnv_io_countStandards(UErrorCode *pErrorCode) {
929    if (haveAliasData(pErrorCode)) {
930        /* Don't include the empty list */
931        return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
932    }
933
934    return 0;
935}
936
937U_CAPI const char * U_EXPORT2
938ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
939    if (haveAliasData(pErrorCode)) {
940        if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
941            return GET_STRING(gMainTable.tagList[n]);
942        }
943        *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
944    }
945
946    return NULL;
947}
948
949U_CAPI const char * U_EXPORT2
950ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
951    if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
952        uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
953
954        if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
955            const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
956
957            /* Get the preferred name from this list */
958            if (currList[0]) {
959                return GET_STRING(currList[0]);
960            }
961            /* else someone screwed up the alias table. */
962            /* *pErrorCode = U_INVALID_FORMAT_ERROR */
963        }
964    }
965
966    return NULL;
967}
968
969U_CAPI uint16_t U_EXPORT2
970ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
971{
972    return ucnv_io_countAliases(alias, pErrorCode);
973}
974
975
976U_CAPI const char* U_EXPORT2
977ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
978{
979    return ucnv_io_getAlias(alias, n, pErrorCode);
980}
981
982U_CAPI void U_EXPORT2
983ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
984{
985    ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
986}
987
988U_CAPI uint16_t U_EXPORT2
989ucnv_countStandards(void)
990{
991    UErrorCode err = U_ZERO_ERROR;
992    return ucnv_io_countStandards(&err);
993}
994
995U_CAPI const char * U_EXPORT2
996ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
997    if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
998        uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
999
1000        if (convNum < gMainTable.converterListSize) {
1001            return GET_STRING(gMainTable.converterList[convNum]);
1002        }
1003    }
1004
1005    return NULL;
1006}
1007
1008static int32_t U_CALLCONV
1009ucnv_io_countAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
1010    return gMainTable.converterListSize;
1011}
1012
1013static const char* U_CALLCONV
1014ucnv_io_nextAllConverters(UEnumeration *enumerator,
1015                            int32_t* resultLength,
1016                            UErrorCode *pErrorCode)
1017{
1018    uint16_t *myContext = (uint16_t *)(enumerator->context);
1019
1020    if (*myContext < gMainTable.converterListSize) {
1021        const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
1022        if (resultLength) {
1023            *resultLength = (int32_t)uprv_strlen(myStr);
1024        }
1025        return myStr;
1026    }
1027    /* Either we accessed a zero length list, or we enumerated too far. */
1028    if (resultLength) {
1029        *resultLength = 0;
1030    }
1031    return NULL;
1032}
1033
1034static void U_CALLCONV
1035ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
1036    *((uint16_t *)(enumerator->context)) = 0;
1037}
1038
1039static const UEnumeration gEnumAllConverters = {
1040    NULL,
1041    NULL,
1042    ucnv_io_closeUEnumeration,
1043    ucnv_io_countAllConverters,
1044    uenum_unextDefault,
1045    ucnv_io_nextAllConverters,
1046    ucnv_io_resetAllConverters
1047};
1048
1049U_CAPI UEnumeration * U_EXPORT2
1050ucnv_openAllNames(UErrorCode *pErrorCode) {
1051    UEnumeration *myEnum = NULL;
1052    if (haveAliasData(pErrorCode)) {
1053        uint16_t *myContext;
1054
1055        myEnum = uprv_malloc(sizeof(UEnumeration));
1056        if (myEnum == NULL) {
1057            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1058            return NULL;
1059        }
1060        uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
1061        myContext = uprv_malloc(sizeof(uint16_t));
1062        if (myContext == NULL) {
1063            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1064            uprv_free(myEnum);
1065            return NULL;
1066        }
1067        *myContext = 0;
1068        myEnum->context = myContext;
1069    }
1070    return myEnum;
1071}
1072
1073U_CFUNC uint16_t
1074ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
1075    if (haveAliasData(pErrorCode)) {
1076        return (uint16_t)gMainTable.converterListSize;
1077    }
1078    return 0;
1079}
1080
1081/* alias table swapping ----------------------------------------------------- */
1082
1083typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
1084
1085/*
1086 * row of a temporary array
1087 *
1088 * gets platform-endian charset string indexes and sorting indexes;
1089 * after sorting this array by strings, the actual arrays are permutated
1090 * according to the sorting indexes
1091 */
1092typedef struct TempRow {
1093    uint16_t strIndex, sortIndex;
1094} TempRow;
1095
1096typedef struct TempAliasTable {
1097    const char *chars;
1098    TempRow *rows;
1099    uint16_t *resort;
1100    StripForCompareFn *stripForCompare;
1101} TempAliasTable;
1102
1103enum {
1104    STACK_ROW_CAPACITY=500
1105};
1106
1107static int32_t
1108io_compareRows(const void *context, const void *left, const void *right) {
1109    char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1110         strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1111
1112    TempAliasTable *tempTable=(TempAliasTable *)context;
1113    const char *chars=tempTable->chars;
1114
1115    return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
1116                                tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
1117}
1118
1119U_CAPI int32_t U_EXPORT2
1120ucnv_swapAliases(const UDataSwapper *ds,
1121                 const void *inData, int32_t length, void *outData,
1122                 UErrorCode *pErrorCode) {
1123    const UDataInfo *pInfo;
1124    int32_t headerSize;
1125
1126    const uint16_t *inTable;
1127    const uint32_t *inSectionSizes;
1128    uint32_t toc[offsetsCount];
1129    uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1130    uint32_t i, count, tocLength, topOffset;
1131
1132    TempRow rows[STACK_ROW_CAPACITY];
1133    uint16_t resort[STACK_ROW_CAPACITY];
1134    TempAliasTable tempTable;
1135
1136    /* udata_swapDataHeader checks the arguments */
1137    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1138    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1139        return 0;
1140    }
1141
1142    /* check data format and format version */
1143    pInfo=(const UDataInfo *)((const char *)inData+4);
1144    if(!(
1145        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
1146        pInfo->dataFormat[1]==0x76 &&
1147        pInfo->dataFormat[2]==0x41 &&
1148        pInfo->dataFormat[3]==0x6c &&
1149        pInfo->formatVersion[0]==3
1150    )) {
1151        udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1152                         pInfo->dataFormat[0], pInfo->dataFormat[1],
1153                         pInfo->dataFormat[2], pInfo->dataFormat[3],
1154                         pInfo->formatVersion[0]);
1155        *pErrorCode=U_UNSUPPORTED_ERROR;
1156        return 0;
1157    }
1158
1159    /* an alias table must contain at least the table of contents array */
1160    if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1161        udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1162                         length-headerSize);
1163        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1164        return 0;
1165    }
1166
1167    inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
1168    inTable=(const uint16_t *)inSectionSizes;
1169    uprv_memset(toc, 0, sizeof(toc));
1170    toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
1171    if(tocLength<minTocLength || offsetsCount<=tocLength) {
1172        udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
1173        *pErrorCode=U_INVALID_FORMAT_ERROR;
1174        return 0;
1175    }
1176
1177    /* read the known part of the table of contents */
1178    for(i=converterListIndex; i<=tocLength; ++i) {
1179        toc[i]=ds->readUInt32(inSectionSizes[i]);
1180    }
1181
1182    /* compute offsets */
1183    uprv_memset(offsets, 0, sizeof(offsets));
1184    offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
1185    for(i=tagListIndex; i<=tocLength; ++i) {
1186        offsets[i]=offsets[i-1]+toc[i-1];
1187    }
1188
1189    /* compute the overall size of the after-header data, in numbers of 16-bit units */
1190    topOffset=offsets[i-1]+toc[i-1];
1191
1192    if(length>=0) {
1193        uint16_t *outTable;
1194        const uint16_t *p, *p2;
1195        uint16_t *q, *q2;
1196        uint16_t oldIndex;
1197
1198        if((length-headerSize)<(2*(int32_t)topOffset)) {
1199            udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1200                             length-headerSize);
1201            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1202            return 0;
1203        }
1204
1205        outTable=(uint16_t *)((char *)outData+headerSize);
1206
1207        /* swap the entire table of contents */
1208        ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1209
1210        /* swap unormalized strings & normalized strings */
1211        ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
1212                             outTable+offsets[stringTableIndex], pErrorCode);
1213        if(U_FAILURE(*pErrorCode)) {
1214            udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
1215            return 0;
1216        }
1217
1218        if(ds->inCharset==ds->outCharset) {
1219            /* no need to sort, just swap all 16-bit values together */
1220            ds->swapArray16(ds,
1221                            inTable+offsets[converterListIndex],
1222                            2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1223                            outTable+offsets[converterListIndex],
1224                            pErrorCode);
1225        } else {
1226            /* allocate the temporary table for sorting */
1227            count=toc[aliasListIndex];
1228
1229            tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1230
1231            if(count<=STACK_ROW_CAPACITY) {
1232                tempTable.rows=rows;
1233                tempTable.resort=resort;
1234            } else {
1235                tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1236                if(tempTable.rows==NULL) {
1237                    udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1238                                     count);
1239                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1240                    return 0;
1241                }
1242                tempTable.resort=(uint16_t *)(tempTable.rows+count);
1243            }
1244
1245            if(ds->outCharset==U_ASCII_FAMILY) {
1246                tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1247            } else /* U_EBCDIC_FAMILY */ {
1248                tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1249            }
1250
1251            /*
1252             * Sort unique aliases+mapped names.
1253             *
1254             * We need to sort the list again by outCharset strings because they
1255             * sort differently for different charset families.
1256             * First we set up a temporary table with the string indexes and
1257             * sorting indexes and sort that.
1258             * Then we permutate and copy/swap the actual values.
1259             */
1260            p=inTable+offsets[aliasListIndex];
1261            q=outTable+offsets[aliasListIndex];
1262
1263            p2=inTable+offsets[untaggedConvArrayIndex];
1264            q2=outTable+offsets[untaggedConvArrayIndex];
1265
1266            for(i=0; i<count; ++i) {
1267                tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1268                tempTable.rows[i].sortIndex=(uint16_t)i;
1269            }
1270
1271            uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1272                           io_compareRows, &tempTable,
1273                           FALSE, pErrorCode);
1274
1275            if(U_SUCCESS(*pErrorCode)) {
1276                /* copy/swap/permutate items */
1277                if(p!=q) {
1278                    for(i=0; i<count; ++i) {
1279                        oldIndex=tempTable.rows[i].sortIndex;
1280                        ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1281                        ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1282                    }
1283                } else {
1284                    /*
1285                     * If we swap in-place, then the permutation must use another
1286                     * temporary array (tempTable.resort)
1287                     * before the results are copied to the outBundle.
1288                     */
1289                    uint16_t *r=tempTable.resort;
1290
1291                    for(i=0; i<count; ++i) {
1292                        oldIndex=tempTable.rows[i].sortIndex;
1293                        ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1294                    }
1295                    uprv_memcpy(q, r, 2*count);
1296
1297                    for(i=0; i<count; ++i) {
1298                        oldIndex=tempTable.rows[i].sortIndex;
1299                        ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1300                    }
1301                    uprv_memcpy(q2, r, 2*count);
1302                }
1303            }
1304
1305            if(tempTable.rows!=rows) {
1306                uprv_free(tempTable.rows);
1307            }
1308
1309            if(U_FAILURE(*pErrorCode)) {
1310                udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1311                                 count);
1312                return 0;
1313            }
1314
1315            /* swap remaining 16-bit values */
1316            ds->swapArray16(ds,
1317                            inTable+offsets[converterListIndex],
1318                            2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1319                            outTable+offsets[converterListIndex],
1320                            pErrorCode);
1321            ds->swapArray16(ds,
1322                            inTable+offsets[taggedAliasArrayIndex],
1323                            2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1324                            outTable+offsets[taggedAliasArrayIndex],
1325                            pErrorCode);
1326        }
1327    }
1328
1329    return headerSize+2*(int32_t)topOffset;
1330}
1331
1332#endif
1333
1334/*
1335 * Hey, Emacs, please set the following:
1336 *
1337 * Local Variables:
1338 * indent-tabs-mode: nil
1339 * End:
1340 *
1341 */
1342