1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6*   Copyright (C) 2000-2015, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9*******************************************************************************
10*
11* File reslist.cpp
12*
13* Modification History:
14*
15*   Date        Name        Description
16*   02/21/00    weiv        Creation.
17*******************************************************************************
18*/
19
20// Safer use of UnicodeString.
21#ifndef UNISTR_FROM_CHAR_EXPLICIT
22#   define UNISTR_FROM_CHAR_EXPLICIT explicit
23#endif
24
25// Less important, but still a good idea.
26#ifndef UNISTR_FROM_STRING_EXPLICIT
27#   define UNISTR_FROM_STRING_EXPLICIT explicit
28#endif
29
30#include <assert.h>
31#include <stdio.h>
32#include "unicode/localpointer.h"
33#include "reslist.h"
34#include "unewdata.h"
35#include "unicode/ures.h"
36#include "unicode/putil.h"
37#include "errmsg.h"
38
39#include "uarrsort.h"
40#include "uelement.h"
41#include "uhash.h"
42#include "uinvchar.h"
43#include "ustr_imp.h"
44#include "unicode/utf16.h"
45/*
46 * Align binary data at a 16-byte offset from the start of the resource bundle,
47 * to be safe for any data type it may contain.
48 */
49#define BIN_ALIGNMENT 16
50
51// This numeric constant must be at least 1.
52// If StringResource.fNumUnitsSaved == 0 then the string occurs only once,
53// and it makes no sense to move it to the pool bundle.
54// The larger the threshold for fNumUnitsSaved
55// the smaller the savings, and the smaller the pool bundle.
56// We trade some total size reduction to reduce the pool bundle a bit,
57// so that one can reasonably save data size by
58// removing bundle files without rebuilding the pool bundle.
59// This can also help to keep the pool and total (pool+local) string indexes
60// within 16 bits, that is, within range of Table16 and Array16 containers.
61#ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING
62#   define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10
63#endif
64
65U_NAMESPACE_USE
66
67static UBool gIncludeCopyright = FALSE;
68static UBool gUsePoolBundle = FALSE;
69static UBool gIsDefaultFormatVersion = TRUE;
70static int32_t gFormatVersion = 3;
71
72/* How do we store string values? */
73enum {
74    STRINGS_UTF16_V1,   /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */
75    STRINGS_UTF16_V2    /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */
76};
77
78static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40;  /* do not store the length explicitly for such strings */
79
80static const ResFile kNoPoolBundle;
81
82/*
83 * res_none() returns the address of kNoResource,
84 * for use in non-error cases when no resource is to be added to the bundle.
85 * (NULL is used in error cases.)
86 */
87static SResource kNoResource;  // TODO: const
88
89static UDataInfo dataInfo= {
90    sizeof(UDataInfo),
91    0,
92
93    U_IS_BIG_ENDIAN,
94    U_CHARSET_FAMILY,
95    sizeof(UChar),
96    0,
97
98    {0x52, 0x65, 0x73, 0x42},     /* dataFormat="ResB" */
99    {1, 3, 0, 0},                 /* formatVersion */
100    {1, 4, 0, 0}                  /* dataVersion take a look at version inside parsed resb*/
101};
102
103static const UVersionInfo gFormatVersions[4] = {  /* indexed by a major-formatVersion integer */
104    { 0, 0, 0, 0 },
105    { 1, 3, 0, 0 },
106    { 2, 0, 0, 0 },
107    { 3, 0, 0, 0 }
108};
109// Remember to update genrb.h GENRB_VERSION when changing the data format.
110// (Or maybe we should remove GENRB_VERSION and report the ICU version number?)
111
112static uint8_t calcPadding(uint32_t size) {
113    /* returns space we need to pad */
114    return (uint8_t) ((size % sizeof(uint32_t)) ? (sizeof(uint32_t) - (size % sizeof(uint32_t))) : 0);
115
116}
117
118void setIncludeCopyright(UBool val){
119    gIncludeCopyright=val;
120}
121
122UBool getIncludeCopyright(void){
123    return gIncludeCopyright;
124}
125
126void setFormatVersion(int32_t formatVersion) {
127    gIsDefaultFormatVersion = FALSE;
128    gFormatVersion = formatVersion;
129}
130
131int32_t getFormatVersion() {
132    return gFormatVersion;
133}
134
135void setUsePoolBundle(UBool use) {
136    gUsePoolBundle = use;
137}
138
139// TODO: return const pointer, or find another way to express "none"
140struct SResource* res_none() {
141    return &kNoResource;
142}
143
144SResource::SResource()
145        : fType(URES_NONE), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1),
146          line(0), fNext(NULL) {
147    ustr_init(&fComment);
148}
149
150SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment,
151                     UErrorCode &errorCode)
152        : fType(type), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1),
153          fKey(bundle != NULL ? bundle->addTag(tag, errorCode) : -1), fKey16(-1),
154          line(0), fNext(NULL) {
155    ustr_init(&fComment);
156    if(comment != NULL) {
157        ustr_cpy(&fComment, comment, &errorCode);
158    }
159}
160
161SResource::~SResource() {
162    ustr_deinit(&fComment);
163}
164
165ContainerResource::~ContainerResource() {
166    SResource *current = fFirst;
167    while (current != NULL) {
168        SResource *next = current->fNext;
169        delete current;
170        current = next;
171    }
172}
173
174TableResource::~TableResource() {}
175
176// TODO: clarify that containers adopt new items, even in error cases; use LocalPointer
177void TableResource::add(SResource *res, int linenumber, UErrorCode &errorCode) {
178    if (U_FAILURE(errorCode) || res == NULL || res == &kNoResource) {
179        return;
180    }
181
182    /* remember this linenumber to report to the user if there is a duplicate key */
183    res->line = linenumber;
184
185    /* here we need to traverse the list */
186    ++fCount;
187
188    /* is the list still empty? */
189    if (fFirst == NULL) {
190        fFirst = res;
191        res->fNext = NULL;
192        return;
193    }
194
195    const char *resKeyString = fRoot->fKeys + res->fKey;
196
197    SResource *current = fFirst;
198
199    SResource *prev = NULL;
200    while (current != NULL) {
201        const char *currentKeyString = fRoot->fKeys + current->fKey;
202        int diff;
203        /*
204         * formatVersion 1: compare key strings in native-charset order
205         * formatVersion 2 and up: compare key strings in ASCII order
206         */
207        if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) {
208            diff = uprv_strcmp(currentKeyString, resKeyString);
209        } else {
210            diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString);
211        }
212        if (diff < 0) {
213            prev    = current;
214            current = current->fNext;
215        } else if (diff > 0) {
216            /* we're either in front of the list, or in the middle */
217            if (prev == NULL) {
218                /* front of the list */
219                fFirst = res;
220            } else {
221                /* middle of the list */
222                prev->fNext = res;
223            }
224
225            res->fNext = current;
226            return;
227        } else {
228            /* Key already exists! ERROR! */
229            error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line);
230            errorCode = U_UNSUPPORTED_ERROR;
231            return;
232        }
233    }
234
235    /* end of list */
236    prev->fNext = res;
237    res->fNext  = NULL;
238}
239
240ArrayResource::~ArrayResource() {}
241
242void ArrayResource::add(SResource *res) {
243    if (res != NULL && res != &kNoResource) {
244        if (fFirst == NULL) {
245            fFirst = res;
246        } else {
247            fLast->fNext = res;
248        }
249        fLast = res;
250        ++fCount;
251    }
252}
253
254PseudoListResource::~PseudoListResource() {}
255
256void PseudoListResource::add(SResource *res) {
257    if (res != NULL && res != &kNoResource) {
258        res->fNext = fFirst;
259        fFirst = res;
260        ++fCount;
261    }
262}
263
264StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type,
265                                       const UChar *value, int32_t len,
266                                       const UString* comment, UErrorCode &errorCode)
267        : SResource(bundle, tag, type, comment, errorCode) {
268    if (len == 0 && gFormatVersion > 1) {
269        fRes = URES_MAKE_EMPTY_RESOURCE(type);
270        fWritten = TRUE;
271        return;
272    }
273
274    fString.setTo(ConstChar16Ptr(value), len);
275    fString.getTerminatedBuffer();  // Some code relies on NUL-termination.
276    if (U_SUCCESS(errorCode) && fString.isBogus()) {
277        errorCode = U_MEMORY_ALLOCATION_ERROR;
278    }
279}
280
281StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type,
282                                       const icu::UnicodeString &value, UErrorCode &errorCode)
283        : SResource(bundle, NULL, type, NULL, errorCode), fString(value) {
284    if (value.isEmpty() && gFormatVersion > 1) {
285        fRes = URES_MAKE_EMPTY_RESOURCE(type);
286        fWritten = TRUE;
287        return;
288    }
289
290    fString.getTerminatedBuffer();  // Some code relies on NUL-termination.
291    if (U_SUCCESS(errorCode) && fString.isBogus()) {
292        errorCode = U_MEMORY_ALLOCATION_ERROR;
293    }
294}
295
296// Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty.
297StringBaseResource::StringBaseResource(int8_t type, const UChar *value, int32_t len,
298                                       UErrorCode &errorCode)
299        : SResource(NULL, NULL, type, NULL, errorCode), fString(TRUE, value, len) {
300    assert(len > 0);
301    assert(!fString.isBogus());
302}
303
304StringBaseResource::~StringBaseResource() {}
305
306static int32_t U_CALLCONV
307string_hash(const UElement key) {
308    const StringResource *res = static_cast<const StringResource *>(key.pointer);
309    return res->fString.hashCode();
310}
311
312static UBool U_CALLCONV
313string_comp(const UElement key1, const UElement key2) {
314    const StringResource *res1 = static_cast<const StringResource *>(key1.pointer);
315    const StringResource *res2 = static_cast<const StringResource *>(key2.pointer);
316    return res1->fString == res2->fString;
317}
318
319StringResource::~StringResource() {}
320
321AliasResource::~AliasResource() {}
322
323IntResource::IntResource(SRBRoot *bundle, const char *tag, int32_t value,
324                         const UString* comment, UErrorCode &errorCode)
325        : SResource(bundle, tag, URES_INT, comment, errorCode) {
326    fValue = value;
327    fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET);
328    fWritten = TRUE;
329}
330
331IntResource::~IntResource() {}
332
333IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag,
334                  const UString* comment, UErrorCode &errorCode)
335        : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode),
336          fCount(0), fArray(new uint32_t[RESLIST_MAX_INT_VECTOR]) {
337    if (fArray == NULL) {
338        errorCode = U_MEMORY_ALLOCATION_ERROR;
339        return;
340    }
341}
342
343IntVectorResource::~IntVectorResource() {
344    delete[] fArray;
345}
346
347void IntVectorResource::add(int32_t value, UErrorCode &errorCode) {
348    if (U_SUCCESS(errorCode)) {
349        fArray[fCount++] = value;
350    }
351}
352
353BinaryResource::BinaryResource(SRBRoot *bundle, const char *tag,
354                               uint32_t length, uint8_t *data, const char* fileName,
355                               const UString* comment, UErrorCode &errorCode)
356        : SResource(bundle, tag, URES_BINARY, comment, errorCode),
357          fLength(length), fData(NULL), fFileName(NULL) {
358    if (U_FAILURE(errorCode)) {
359        return;
360    }
361    if (fileName != NULL && *fileName != 0){
362        fFileName = new char[uprv_strlen(fileName)+1];
363        if (fFileName == NULL) {
364            errorCode = U_MEMORY_ALLOCATION_ERROR;
365            return;
366        }
367        uprv_strcpy(fFileName, fileName);
368    }
369    if (length > 0) {
370        fData = new uint8_t[length];
371        if (fData == NULL) {
372            errorCode = U_MEMORY_ALLOCATION_ERROR;
373            return;
374        }
375        uprv_memcpy(fData, data, length);
376    } else {
377        if (gFormatVersion > 1) {
378            fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY);
379            fWritten = TRUE;
380        }
381    }
382}
383
384BinaryResource::~BinaryResource() {
385    delete[] fData;
386    delete[] fFileName;
387}
388
389/* Writing Functions */
390
391void
392StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
393                                       UErrorCode &errorCode) {
394    assert(fSame == NULL);
395    fSame = static_cast<StringResource *>(uhash_get(stringSet, this));
396    if (fSame != NULL) {
397        // This is a duplicate of a pool bundle string or of an earlier-visited string.
398        if (++fSame->fNumCopies == 1) {
399            assert(fSame->fWritten);
400            int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(fSame->fRes);
401            if (poolStringIndex >= bundle->fPoolStringIndexLimit) {
402                bundle->fPoolStringIndexLimit = poolStringIndex + 1;
403            }
404        }
405        return;
406    }
407    /* Put this string into the set for finding duplicates. */
408    fNumCopies = 1;
409    uhash_put(stringSet, this, this, &errorCode);
410
411    if (bundle->fStringsForm != STRINGS_UTF16_V1) {
412        int32_t len = length();
413        if (len <= MAX_IMPLICIT_STRING_LENGTH &&
414                !U16_IS_TRAIL(fString[0]) && fString.indexOf((UChar)0) < 0) {
415            /*
416             * This string will be stored without an explicit length.
417             * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
418             */
419            fNumCharsForLength = 0;
420        } else if (len <= 0x3ee) {
421            fNumCharsForLength = 1;
422        } else if (len <= 0xfffff) {
423            fNumCharsForLength = 2;
424        } else {
425            fNumCharsForLength = 3;
426        }
427        bundle->f16BitStringsLength += fNumCharsForLength + len + 1;  /* +1 for the NUL */
428    }
429}
430
431void
432ContainerResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
433                                          UErrorCode &errorCode) {
434    for (SResource *current = fFirst; current != NULL; current = current->fNext) {
435        current->preflightStrings(bundle, stringSet, errorCode);
436    }
437}
438
439void
440SResource::preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) {
441    if (U_FAILURE(errorCode)) {
442        return;
443    }
444    if (fRes != RES_BOGUS) {
445        /*
446         * The resource item word was already precomputed, which means
447         * no further data needs to be written.
448         * This might be an integer, or an empty string/binary/etc.
449         */
450        return;
451    }
452    handlePreflightStrings(bundle, stringSet, errorCode);
453}
454
455void
456SResource::handlePreflightStrings(SRBRoot * /*bundle*/, UHashtable * /*stringSet*/,
457                                  UErrorCode & /*errorCode*/) {
458    /* Neither a string nor a container. */
459}
460
461int32_t
462SRBRoot::makeRes16(uint32_t resWord) const {
463    if (resWord == 0) {
464        return 0;  /* empty string */
465    }
466    uint32_t type = RES_GET_TYPE(resWord);
467    int32_t offset = (int32_t)RES_GET_OFFSET(resWord);
468    if (type == URES_STRING_V2) {
469        assert(offset > 0);
470        if (offset < fPoolStringIndexLimit) {
471            if (offset < fPoolStringIndex16Limit) {
472                return offset;
473            }
474        } else {
475            offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit;
476            if (offset <= 0xffff) {
477                return offset;
478            }
479        }
480    }
481    return -1;
482}
483
484int32_t
485SRBRoot::mapKey(int32_t oldpos) const {
486    const KeyMapEntry *map = fKeyMap;
487    if (map == NULL) {
488        return oldpos;
489    }
490    int32_t i, start, limit;
491
492    /* do a binary search for the old, pre-compactKeys() key offset */
493    start = fUsePoolBundle->fKeysCount;
494    limit = start + fKeysCount;
495    while (start < limit - 1) {
496        i = (start + limit) / 2;
497        if (oldpos < map[i].oldpos) {
498            limit = i;
499        } else {
500            start = i;
501        }
502    }
503    assert(oldpos == map[start].oldpos);
504    return map[start].newpos;
505}
506
507/*
508 * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings.
509 * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS
510 * and exits early.
511 */
512void
513StringResource::handleWrite16(SRBRoot * /*bundle*/) {
514    SResource *same;
515    if ((same = fSame) != NULL) {
516        /* This is a duplicate. */
517        assert(same->fRes != RES_BOGUS && same->fWritten);
518        fRes = same->fRes;
519        fWritten = same->fWritten;
520    }
521}
522
523void
524ContainerResource::writeAllRes16(SRBRoot *bundle) {
525    for (SResource *current = fFirst; current != NULL; current = current->fNext) {
526        bundle->f16BitUnits.append((UChar)current->fRes16);
527    }
528    fWritten = TRUE;
529}
530
531void
532ArrayResource::handleWrite16(SRBRoot *bundle) {
533    if (fCount == 0 && gFormatVersion > 1) {
534        fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY);
535        fWritten = TRUE;
536        return;
537    }
538
539    int32_t res16 = 0;
540    for (SResource *current = fFirst; current != NULL; current = current->fNext) {
541        current->write16(bundle);
542        res16 |= current->fRes16;
543    }
544    if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) {
545        fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length());
546        bundle->f16BitUnits.append((UChar)fCount);
547        writeAllRes16(bundle);
548    }
549}
550
551void
552TableResource::handleWrite16(SRBRoot *bundle) {
553    if (fCount == 0 && gFormatVersion > 1) {
554        fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
555        fWritten = TRUE;
556        return;
557    }
558    /* Find the smallest table type that fits the data. */
559    int32_t key16 = 0;
560    int32_t res16 = 0;
561    for (SResource *current = fFirst; current != NULL; current = current->fNext) {
562        current->write16(bundle);
563        key16 |= current->fKey16;
564        res16 |= current->fRes16;
565    }
566    if(fCount > (uint32_t)bundle->fMaxTableLength) {
567        bundle->fMaxTableLength = fCount;
568    }
569    if (fCount <= 0xffff && key16 >= 0) {
570        if (res16 >= 0 && gFormatVersion > 1) {
571            /* 16-bit count, key offsets and values */
572            fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length());
573            bundle->f16BitUnits.append((UChar)fCount);
574            for (SResource *current = fFirst; current != NULL; current = current->fNext) {
575                bundle->f16BitUnits.append((UChar)current->fKey16);
576            }
577            writeAllRes16(bundle);
578        } else {
579            /* 16-bit count, 16-bit key offsets, 32-bit values */
580            fTableType = URES_TABLE;
581        }
582    } else {
583        /* 32-bit count, key offsets and values */
584        fTableType = URES_TABLE32;
585    }
586}
587
588void
589PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) {
590    fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
591    fWritten = TRUE;
592}
593
594void
595SResource::write16(SRBRoot *bundle) {
596    if (fKey >= 0) {
597        // A tagged resource has a non-negative key index into the parsed key strings.
598        // compactKeys() built a map from parsed key index to the final key index.
599        // After the mapping, negative key indexes are used for shared pool bundle keys.
600        fKey = bundle->mapKey(fKey);
601        // If the key index fits into a Key16 for a Table or Table16,
602        // then set the fKey16 field accordingly.
603        // Otherwise keep it at -1.
604        if (fKey >= 0) {
605            if (fKey < bundle->fLocalKeyLimit) {
606                fKey16 = fKey;
607            }
608        } else {
609            int32_t poolKeyIndex = fKey & 0x7fffffff;
610            if (poolKeyIndex <= 0xffff) {
611                poolKeyIndex += bundle->fLocalKeyLimit;
612                if (poolKeyIndex <= 0xffff) {
613                    fKey16 = poolKeyIndex;
614                }
615            }
616        }
617    }
618    /*
619     * fRes != RES_BOGUS:
620     * The resource item word was already precomputed, which means
621     * no further data needs to be written.
622     * This might be an integer, or an empty or UTF-16 v2 string,
623     * an empty binary, etc.
624     */
625    if (fRes == RES_BOGUS) {
626        handleWrite16(bundle);
627    }
628    // Compute fRes16 for precomputed as well as just-computed fRes.
629    fRes16 = bundle->makeRes16(fRes);
630}
631
632void
633SResource::handleWrite16(SRBRoot * /*bundle*/) {
634    /* Only a few resource types write 16-bit units. */
635}
636
637/*
638 * Only called for UTF-16 v1 strings, and for aliases.
639 * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS
640 * and exits early.
641 */
642void
643StringBaseResource::handlePreWrite(uint32_t *byteOffset) {
644    /* Write the UTF-16 v1 string. */
645    fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2);
646    *byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR;
647}
648
649void
650IntVectorResource::handlePreWrite(uint32_t *byteOffset) {
651    if (fCount == 0 && gFormatVersion > 1) {
652        fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR);
653        fWritten = TRUE;
654    } else {
655        fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2);
656        *byteOffset += (1 + fCount) * 4;
657    }
658}
659
660void
661BinaryResource::handlePreWrite(uint32_t *byteOffset) {
662    uint32_t pad       = 0;
663    uint32_t dataStart = *byteOffset + sizeof(fLength);
664
665    if (dataStart % BIN_ALIGNMENT) {
666        pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
667        *byteOffset += pad;  /* pad == 4 or 8 or 12 */
668    }
669    fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2);
670    *byteOffset += 4 + fLength;
671}
672
673void
674ContainerResource::preWriteAllRes(uint32_t *byteOffset) {
675    for (SResource *current = fFirst; current != NULL; current = current->fNext) {
676        current->preWrite(byteOffset);
677    }
678}
679
680void
681ArrayResource::handlePreWrite(uint32_t *byteOffset) {
682    preWriteAllRes(byteOffset);
683    fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2);
684    *byteOffset += (1 + fCount) * 4;
685}
686
687void
688TableResource::handlePreWrite(uint32_t *byteOffset) {
689    preWriteAllRes(byteOffset);
690    if (fTableType == URES_TABLE) {
691        /* 16-bit count, 16-bit key offsets, 32-bit values */
692        fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2);
693        *byteOffset += 2 + fCount * 6;
694    } else {
695        /* 32-bit count, key offsets and values */
696        fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2);
697        *byteOffset += 4 + fCount * 8;
698    }
699}
700
701void
702SResource::preWrite(uint32_t *byteOffset) {
703    if (fRes != RES_BOGUS) {
704        /*
705         * The resource item word was already precomputed, which means
706         * no further data needs to be written.
707         * This might be an integer, or an empty or UTF-16 v2 string,
708         * an empty binary, etc.
709         */
710        return;
711    }
712    handlePreWrite(byteOffset);
713    *byteOffset += calcPadding(*byteOffset);
714}
715
716void
717SResource::handlePreWrite(uint32_t * /*byteOffset*/) {
718    assert(FALSE);
719}
720
721/*
722 * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings,
723 * write() sees fWritten and exits early.
724 */
725void
726StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
727    /* Write the UTF-16 v1 string. */
728    int32_t len = length();
729    udata_write32(mem, len);
730    udata_writeUString(mem, getBuffer(), len + 1);
731    *byteOffset += 4 + (len + 1) * U_SIZEOF_UCHAR;
732    fWritten = TRUE;
733}
734
735void
736ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) {
737    uint32_t i = 0;
738    for (SResource *current = fFirst; current != NULL; ++i, current = current->fNext) {
739        current->write(mem, byteOffset);
740    }
741    assert(i == fCount);
742}
743
744void
745ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) {
746    for (SResource *current = fFirst; current != NULL; current = current->fNext) {
747        udata_write32(mem, current->fRes);
748    }
749    *byteOffset += fCount * 4;
750}
751
752void
753ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
754    writeAllRes(mem, byteOffset);
755    udata_write32(mem, fCount);
756    *byteOffset += 4;
757    writeAllRes32(mem, byteOffset);
758}
759
760void
761IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
762    udata_write32(mem, fCount);
763    for(uint32_t i = 0; i < fCount; ++i) {
764      udata_write32(mem, fArray[i]);
765    }
766    *byteOffset += (1 + fCount) * 4;
767}
768
769void
770BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
771    uint32_t pad       = 0;
772    uint32_t dataStart = *byteOffset + sizeof(fLength);
773
774    if (dataStart % BIN_ALIGNMENT) {
775        pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
776        udata_writePadding(mem, pad);  /* pad == 4 or 8 or 12 */
777        *byteOffset += pad;
778    }
779
780    udata_write32(mem, fLength);
781    if (fLength > 0) {
782        udata_writeBlock(mem, fData, fLength);
783    }
784    *byteOffset += 4 + fLength;
785}
786
787void
788TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
789    writeAllRes(mem, byteOffset);
790    if(fTableType == URES_TABLE) {
791        udata_write16(mem, (uint16_t)fCount);
792        for (SResource *current = fFirst; current != NULL; current = current->fNext) {
793            udata_write16(mem, current->fKey16);
794        }
795        *byteOffset += (1 + fCount)* 2;
796        if ((fCount & 1) == 0) {
797            /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */
798            udata_writePadding(mem, 2);
799            *byteOffset += 2;
800        }
801    } else /* URES_TABLE32 */ {
802        udata_write32(mem, fCount);
803        for (SResource *current = fFirst; current != NULL; current = current->fNext) {
804            udata_write32(mem, (uint32_t)current->fKey);
805        }
806        *byteOffset += (1 + fCount)* 4;
807    }
808    writeAllRes32(mem, byteOffset);
809}
810
811void
812SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) {
813    if (fWritten) {
814        assert(fRes != RES_BOGUS);
815        return;
816    }
817    handleWrite(mem, byteOffset);
818    uint8_t paddingSize = calcPadding(*byteOffset);
819    if (paddingSize > 0) {
820        udata_writePadding(mem, paddingSize);
821        *byteOffset += paddingSize;
822    }
823    fWritten = TRUE;
824}
825
826void
827SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) {
828    assert(FALSE);
829}
830
831void SRBRoot::write(const char *outputDir, const char *outputPkg,
832                    char *writtenFilename, int writtenFilenameLen,
833                    UErrorCode &errorCode) {
834    UNewDataMemory *mem        = NULL;
835    uint32_t        byteOffset = 0;
836    uint32_t        top, size;
837    char            dataName[1024];
838    int32_t         indexes[URES_INDEX_TOP];
839
840    compactKeys(errorCode);
841    /*
842     * Add padding bytes to fKeys so that fKeysTop is 4-aligned.
843     * Safe because the capacity is a multiple of 4.
844     */
845    while (fKeysTop & 3) {
846        fKeys[fKeysTop++] = (char)0xaa;
847    }
848    /*
849     * In URES_TABLE, use all local key offsets that fit into 16 bits,
850     * and use the remaining 16-bit offsets for pool key offsets
851     * if there are any.
852     * If there are no local keys, then use the whole 16-bit space
853     * for pool key offsets.
854     * Note: This cannot be changed without changing the major formatVersion.
855     */
856    if (fKeysBottom < fKeysTop) {
857        if (fKeysTop <= 0x10000) {
858            fLocalKeyLimit = fKeysTop;
859        } else {
860            fLocalKeyLimit = 0x10000;
861        }
862    } else {
863        fLocalKeyLimit = 0;
864    }
865
866    UHashtable *stringSet;
867    if (gFormatVersion > 1) {
868        stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode);
869        if (U_SUCCESS(errorCode) &&
870                fUsePoolBundle != NULL && fUsePoolBundle->fStrings != NULL) {
871            for (SResource *current = fUsePoolBundle->fStrings->fFirst;
872                    current != NULL;
873                    current = current->fNext) {
874                StringResource *sr = static_cast<StringResource *>(current);
875                sr->fNumCopies = 0;
876                sr->fNumUnitsSaved = 0;
877                uhash_put(stringSet, sr, sr, &errorCode);
878            }
879        }
880        fRoot->preflightStrings(this, stringSet, errorCode);
881    } else {
882        stringSet = NULL;
883    }
884    if (fStringsForm == STRINGS_UTF16_V2 && f16BitStringsLength > 0) {
885        compactStringsV2(stringSet, errorCode);
886    }
887    uhash_close(stringSet);
888    if (U_FAILURE(errorCode)) {
889        return;
890    }
891
892    int32_t formatVersion = gFormatVersion;
893    if (fPoolStringIndexLimit != 0) {
894        int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit;
895        if ((sum - 1) > RES_MAX_OFFSET) {
896            errorCode = U_BUFFER_OVERFLOW_ERROR;
897            return;
898        }
899        if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) {
900            // 16-bit indexes work for all pool + local strings.
901            fPoolStringIndex16Limit = fPoolStringIndexLimit;
902        } else {
903            // Set the pool index threshold so that 16-bit indexes work
904            // for some pool strings and some local strings.
905            fPoolStringIndex16Limit = (int32_t)(
906                    ((int64_t)fPoolStringIndexLimit * 0xffff) / sum);
907        }
908    } else if (gIsDefaultFormatVersion && formatVersion == 3 && !fIsPoolBundle) {
909        // If we just default to formatVersion 3
910        // but there are no pool bundle strings to share
911        // and we do not write a pool bundle,
912        // then write formatVersion 2 which is just as good.
913        formatVersion = 2;
914    }
915
916    fRoot->write16(this);
917    if (f16BitUnits.isBogus()) {
918        errorCode = U_MEMORY_ALLOCATION_ERROR;
919        return;
920    }
921    if (f16BitUnits.length() & 1) {
922        f16BitUnits.append((UChar)0xaaaa);  /* pad to multiple of 4 bytes */
923    }
924    /* all keys have been mapped */
925    uprv_free(fKeyMap);
926    fKeyMap = NULL;
927
928    byteOffset = fKeysTop + f16BitUnits.length() * 2;
929    fRoot->preWrite(&byteOffset);
930
931    /* total size including the root item */
932    top = byteOffset;
933
934    if (writtenFilename && writtenFilenameLen) {
935        *writtenFilename = 0;
936    }
937
938    if (writtenFilename) {
939       int32_t off = 0, len = 0;
940       if (outputDir) {
941           len = (int32_t)uprv_strlen(outputDir);
942           if (len > writtenFilenameLen) {
943               len = writtenFilenameLen;
944           }
945           uprv_strncpy(writtenFilename, outputDir, len);
946       }
947       if (writtenFilenameLen -= len) {
948           off += len;
949           writtenFilename[off] = U_FILE_SEP_CHAR;
950           if (--writtenFilenameLen) {
951               ++off;
952               if(outputPkg != NULL)
953               {
954                   uprv_strcpy(writtenFilename+off, outputPkg);
955                   off += (int32_t)uprv_strlen(outputPkg);
956                   writtenFilename[off] = '_';
957                   ++off;
958               }
959
960               len = (int32_t)uprv_strlen(fLocale);
961               if (len > writtenFilenameLen) {
962                   len = writtenFilenameLen;
963               }
964               uprv_strncpy(writtenFilename + off, fLocale, len);
965               if (writtenFilenameLen -= len) {
966                   off += len;
967                   len = 5;
968                   if (len > writtenFilenameLen) {
969                       len = writtenFilenameLen;
970                   }
971                   uprv_strncpy(writtenFilename +  off, ".res", len);
972               }
973           }
974       }
975    }
976
977    if(outputPkg)
978    {
979        uprv_strcpy(dataName, outputPkg);
980        uprv_strcat(dataName, "_");
981        uprv_strcat(dataName, fLocale);
982    }
983    else
984    {
985        uprv_strcpy(dataName, fLocale);
986    }
987
988    uprv_memcpy(dataInfo.formatVersion, gFormatVersions + formatVersion, sizeof(UVersionInfo));
989
990    mem = udata_create(outputDir, "res", dataName,
991                       &dataInfo, (gIncludeCopyright==TRUE)? U_COPYRIGHT_STRING:NULL, &errorCode);
992    if(U_FAILURE(errorCode)){
993        return;
994    }
995
996    /* write the root item */
997    udata_write32(mem, fRoot->fRes);
998
999    /*
1000     * formatVersion 1.1 (ICU 2.8):
1001     * write int32_t indexes[] after root and before the key strings
1002     * to make it easier to parse resource bundles in icuswap or from Java etc.
1003     */
1004    uprv_memset(indexes, 0, sizeof(indexes));
1005    indexes[URES_INDEX_LENGTH]=             fIndexLength;
1006    indexes[URES_INDEX_KEYS_TOP]=           fKeysTop>>2;
1007    indexes[URES_INDEX_RESOURCES_TOP]=      (int32_t)(top>>2);
1008    indexes[URES_INDEX_BUNDLE_TOP]=         indexes[URES_INDEX_RESOURCES_TOP];
1009    indexes[URES_INDEX_MAX_TABLE_LENGTH]=   fMaxTableLength;
1010
1011    /*
1012     * formatVersion 1.2 (ICU 3.6):
1013     * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set
1014     * the memset() above initialized all indexes[] to 0
1015     */
1016    if (fNoFallback) {
1017        indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK;
1018    }
1019    /*
1020     * formatVersion 2.0 (ICU 4.4):
1021     * more compact string value storage, optional pool bundle
1022     */
1023    if (URES_INDEX_16BIT_TOP < fIndexLength) {
1024        indexes[URES_INDEX_16BIT_TOP] = (fKeysTop>>2) + (f16BitUnits.length()>>1);
1025    }
1026    if (URES_INDEX_POOL_CHECKSUM < fIndexLength) {
1027        if (fIsPoolBundle) {
1028            indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK;
1029            uint32_t checksum = computeCRC((const char *)(fKeys + fKeysBottom),
1030                                           (uint32_t)(fKeysTop - fKeysBottom), 0);
1031            if (f16BitUnits.length() <= 1) {
1032                // no pool strings to checksum
1033            } else if (U_IS_BIG_ENDIAN) {
1034                checksum = computeCRC(reinterpret_cast<const char *>(f16BitUnits.getBuffer()),
1035                                      (uint32_t)f16BitUnits.length() * 2, checksum);
1036            } else {
1037                // Swap to big-endian so we get the same checksum on all platforms
1038                // (except for charset family, due to the key strings).
1039                UnicodeString s(f16BitUnits);
1040                s.append((UChar)1);  // Ensure that we own this buffer.
1041                assert(!s.isBogus());
1042                uint16_t *p = const_cast<uint16_t *>(reinterpret_cast<const uint16_t *>(s.getBuffer()));
1043                for (int32_t count = f16BitUnits.length(); count > 0; --count) {
1044                    uint16_t x = *p;
1045                    *p++ = (uint16_t)((x << 8) | (x >> 8));
1046                }
1047                checksum = computeCRC((const char *)p,
1048                                      (uint32_t)f16BitUnits.length() * 2, checksum);
1049            }
1050            indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum;
1051        } else if (gUsePoolBundle) {
1052            indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE;
1053            indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum;
1054        }
1055    }
1056    // formatVersion 3 (ICU 56):
1057    // share string values via pool bundle strings
1058    indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8;  // bits 23..0 -> 31..8
1059    indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000;  // bits 27..24 -> 15..12
1060    indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16;
1061
1062    /* write the indexes[] */
1063    udata_writeBlock(mem, indexes, fIndexLength*4);
1064
1065    /* write the table key strings */
1066    udata_writeBlock(mem, fKeys+fKeysBottom,
1067                          fKeysTop-fKeysBottom);
1068
1069    /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */
1070    udata_writeBlock(mem, f16BitUnits.getBuffer(), f16BitUnits.length()*2);
1071
1072    /* write all of the bundle contents: the root item and its children */
1073    byteOffset = fKeysTop + f16BitUnits.length() * 2;
1074    fRoot->write(mem, &byteOffset);
1075    assert(byteOffset == top);
1076
1077    size = udata_finish(mem, &errorCode);
1078    if(top != size) {
1079        fprintf(stderr, "genrb error: wrote %u bytes but counted %u\n",
1080                (int)size, (int)top);
1081        errorCode = U_INTERNAL_PROGRAM_ERROR;
1082    }
1083}
1084
1085/* Opening Functions */
1086
1087TableResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1088    LocalPointer<TableResource> res(new TableResource(bundle, tag, comment, *status), *status);
1089    return U_SUCCESS(*status) ? res.orphan() : NULL;
1090}
1091
1092ArrayResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1093    LocalPointer<ArrayResource> res(new ArrayResource(bundle, tag, comment, *status), *status);
1094    return U_SUCCESS(*status) ? res.orphan() : NULL;
1095}
1096
1097struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
1098    LocalPointer<SResource> res(
1099            new StringResource(bundle, tag, value, len, comment, *status), *status);
1100    return U_SUCCESS(*status) ? res.orphan() : NULL;
1101}
1102
1103struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
1104    LocalPointer<SResource> res(
1105            new AliasResource(bundle, tag, value, len, comment, *status), *status);
1106    return U_SUCCESS(*status) ? res.orphan() : NULL;
1107}
1108
1109IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1110    LocalPointer<IntVectorResource> res(
1111            new IntVectorResource(bundle, tag, comment, *status), *status);
1112    return U_SUCCESS(*status) ? res.orphan() : NULL;
1113}
1114
1115struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status) {
1116    LocalPointer<SResource> res(new IntResource(bundle, tag, value, comment, *status), *status);
1117    return U_SUCCESS(*status) ? res.orphan() : NULL;
1118}
1119
1120struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) {
1121    LocalPointer<SResource> res(
1122            new BinaryResource(bundle, tag, length, data, fileName, comment, *status), *status);
1123    return U_SUCCESS(*status) ? res.orphan() : NULL;
1124}
1125
1126SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode)
1127        : fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE),
1128          fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle),
1129          fKeys(NULL), fKeyMap(NULL),
1130          fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), fKeysCount(0), fLocalKeyLimit(0),
1131          f16BitUnits(), f16BitStringsLength(0),
1132          fUsePoolBundle(&kNoPoolBundle),
1133          fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0),
1134          fWritePoolBundle(NULL) {
1135    if (U_FAILURE(errorCode)) {
1136        return;
1137    }
1138
1139    if (gFormatVersion > 1) {
1140        // f16BitUnits must start with a zero for empty resources.
1141        // We might be able to omit it if there are no empty 16-bit resources.
1142        f16BitUnits.append((UChar)0);
1143    }
1144
1145    fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE);
1146    if (isPoolBundle) {
1147        fRoot = new PseudoListResource(this, errorCode);
1148    } else {
1149        fRoot = new TableResource(this, NULL, comment, errorCode);
1150    }
1151    if (fKeys == NULL || fRoot == NULL || U_FAILURE(errorCode)) {
1152        if (U_SUCCESS(errorCode)) {
1153            errorCode = U_MEMORY_ALLOCATION_ERROR;
1154        }
1155        return;
1156    }
1157
1158    fKeysCapacity = KEY_SPACE_SIZE;
1159    /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */
1160    if (gUsePoolBundle || isPoolBundle) {
1161        fIndexLength = URES_INDEX_POOL_CHECKSUM + 1;
1162    } else if (gFormatVersion >= 2) {
1163        fIndexLength = URES_INDEX_16BIT_TOP + 1;
1164    } else /* formatVersion 1 */ {
1165        fIndexLength = URES_INDEX_ATTRIBUTES + 1;
1166    }
1167    fKeysBottom = (1 /* root */ + fIndexLength) * 4;
1168    uprv_memset(fKeys, 0, fKeysBottom);
1169    fKeysTop = fKeysBottom;
1170
1171    if (gFormatVersion == 1) {
1172        fStringsForm = STRINGS_UTF16_V1;
1173    } else {
1174        fStringsForm = STRINGS_UTF16_V2;
1175    }
1176}
1177
1178/* Closing Functions */
1179
1180void res_close(struct SResource *res) {
1181    delete res;
1182}
1183
1184SRBRoot::~SRBRoot() {
1185    delete fRoot;
1186    uprv_free(fLocale);
1187    uprv_free(fKeys);
1188    uprv_free(fKeyMap);
1189}
1190
1191/* Misc Functions */
1192
1193void SRBRoot::setLocale(UChar *locale, UErrorCode &errorCode) {
1194    if(U_FAILURE(errorCode)) {
1195        return;
1196    }
1197
1198    uprv_free(fLocale);
1199    fLocale = (char*) uprv_malloc(sizeof(char) * (u_strlen(locale)+1));
1200    if(fLocale == NULL) {
1201        errorCode = U_MEMORY_ALLOCATION_ERROR;
1202        return;
1203    }
1204
1205    u_UCharsToChars(locale, fLocale, u_strlen(locale)+1);
1206}
1207
1208const char *
1209SRBRoot::getKeyString(int32_t key) const {
1210    if (key < 0) {
1211        return fUsePoolBundle->fKeys + (key & 0x7fffffff);
1212    } else {
1213        return fKeys + key;
1214    }
1215}
1216
1217const char *
1218SResource::getKeyString(const SRBRoot *bundle) const {
1219    if (fKey == -1) {
1220        return NULL;
1221    }
1222    return bundle->getKeyString(fKey);
1223}
1224
1225const char *
1226SRBRoot::getKeyBytes(int32_t *pLength) const {
1227    *pLength = fKeysTop - fKeysBottom;
1228    return fKeys + fKeysBottom;
1229}
1230
1231int32_t
1232SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) {
1233    int32_t keypos;
1234
1235    if (U_FAILURE(errorCode)) {
1236        return -1;
1237    }
1238    if (length < 0 || (keyBytes == NULL && length != 0)) {
1239        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1240        return -1;
1241    }
1242    if (length == 0) {
1243        return fKeysTop;
1244    }
1245
1246    keypos = fKeysTop;
1247    fKeysTop += length;
1248    if (fKeysTop >= fKeysCapacity) {
1249        /* overflow - resize the keys buffer */
1250        fKeysCapacity += KEY_SPACE_SIZE;
1251        fKeys = static_cast<char *>(uprv_realloc(fKeys, fKeysCapacity));
1252        if(fKeys == NULL) {
1253            errorCode = U_MEMORY_ALLOCATION_ERROR;
1254            return -1;
1255        }
1256    }
1257
1258    uprv_memcpy(fKeys + keypos, keyBytes, length);
1259
1260    return keypos;
1261}
1262
1263int32_t
1264SRBRoot::addTag(const char *tag, UErrorCode &errorCode) {
1265    int32_t keypos;
1266
1267    if (U_FAILURE(errorCode)) {
1268        return -1;
1269    }
1270
1271    if (tag == NULL) {
1272        /* no error: the root table and array items have no keys */
1273        return -1;
1274    }
1275
1276    keypos = addKeyBytes(tag, (int32_t)(uprv_strlen(tag) + 1), errorCode);
1277    if (U_SUCCESS(errorCode)) {
1278        ++fKeysCount;
1279    }
1280    return keypos;
1281}
1282
1283static int32_t
1284compareInt32(int32_t lPos, int32_t rPos) {
1285    /*
1286     * Compare possibly-negative key offsets. Don't just return lPos - rPos
1287     * because that is prone to negative-integer underflows.
1288     */
1289    if (lPos < rPos) {
1290        return -1;
1291    } else if (lPos > rPos) {
1292        return 1;
1293    } else {
1294        return 0;
1295    }
1296}
1297
1298static int32_t U_CALLCONV
1299compareKeySuffixes(const void *context, const void *l, const void *r) {
1300    const struct SRBRoot *bundle=(const struct SRBRoot *)context;
1301    int32_t lPos = ((const KeyMapEntry *)l)->oldpos;
1302    int32_t rPos = ((const KeyMapEntry *)r)->oldpos;
1303    const char *lStart = bundle->getKeyString(lPos);
1304    const char *lLimit = lStart;
1305    const char *rStart = bundle->getKeyString(rPos);
1306    const char *rLimit = rStart;
1307    int32_t diff;
1308    while (*lLimit != 0) { ++lLimit; }
1309    while (*rLimit != 0) { ++rLimit; }
1310    /* compare keys in reverse character order */
1311    while (lStart < lLimit && rStart < rLimit) {
1312        diff = (int32_t)(uint8_t)*--lLimit - (int32_t)(uint8_t)*--rLimit;
1313        if (diff != 0) {
1314            return diff;
1315        }
1316    }
1317    /* sort equal suffixes by descending key length */
1318    diff = (int32_t)(rLimit - rStart) - (int32_t)(lLimit - lStart);
1319    if (diff != 0) {
1320        return diff;
1321    }
1322    /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */
1323    return compareInt32(lPos, rPos);
1324}
1325
1326static int32_t U_CALLCONV
1327compareKeyNewpos(const void * /*context*/, const void *l, const void *r) {
1328    return compareInt32(((const KeyMapEntry *)l)->newpos, ((const KeyMapEntry *)r)->newpos);
1329}
1330
1331static int32_t U_CALLCONV
1332compareKeyOldpos(const void * /*context*/, const void *l, const void *r) {
1333    return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos);
1334}
1335
1336void
1337SRBRoot::compactKeys(UErrorCode &errorCode) {
1338    KeyMapEntry *map;
1339    char *keys;
1340    int32_t i;
1341    int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount;
1342    if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) {
1343        return;
1344    }
1345    map = (KeyMapEntry *)uprv_malloc(keysCount * sizeof(KeyMapEntry));
1346    if (map == NULL) {
1347        errorCode = U_MEMORY_ALLOCATION_ERROR;
1348        return;
1349    }
1350    keys = (char *)fUsePoolBundle->fKeys;
1351    for (i = 0; i < fUsePoolBundle->fKeysCount; ++i) {
1352        map[i].oldpos =
1353            (int32_t)(keys - fUsePoolBundle->fKeys) | 0x80000000;  /* negative oldpos */
1354        map[i].newpos = 0;
1355        while (*keys != 0) { ++keys; }  /* skip the key */
1356        ++keys;  /* skip the NUL */
1357    }
1358    keys = fKeys + fKeysBottom;
1359    for (; i < keysCount; ++i) {
1360        map[i].oldpos = (int32_t)(keys - fKeys);
1361        map[i].newpos = 0;
1362        while (*keys != 0) { ++keys; }  /* skip the key */
1363        ++keys;  /* skip the NUL */
1364    }
1365    /* Sort the keys so that each one is immediately followed by all of its suffixes. */
1366    uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1367                   compareKeySuffixes, this, FALSE, &errorCode);
1368    /*
1369     * Make suffixes point into earlier, longer strings that contain them
1370     * and mark the old, now unused suffix bytes as deleted.
1371     */
1372    if (U_SUCCESS(errorCode)) {
1373        keys = fKeys;
1374        for (i = 0; i < keysCount;) {
1375            /*
1376             * This key is not a suffix of the previous one;
1377             * keep this one and delete the following ones that are
1378             * suffixes of this one.
1379             */
1380            const char *key;
1381            const char *keyLimit;
1382            int32_t j = i + 1;
1383            map[i].newpos = map[i].oldpos;
1384            if (j < keysCount && map[j].oldpos < 0) {
1385                /* Key string from the pool bundle, do not delete. */
1386                i = j;
1387                continue;
1388            }
1389            key = getKeyString(map[i].oldpos);
1390            for (keyLimit = key; *keyLimit != 0; ++keyLimit) {}
1391            for (; j < keysCount && map[j].oldpos >= 0; ++j) {
1392                const char *k;
1393                char *suffix;
1394                const char *suffixLimit;
1395                int32_t offset;
1396                suffix = keys + map[j].oldpos;
1397                for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {}
1398                offset = (int32_t)(keyLimit - key) - (suffixLimit - suffix);
1399                if (offset < 0) {
1400                    break;  /* suffix cannot be longer than the original */
1401                }
1402                /* Is it a suffix of the earlier, longer key? */
1403                for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {}
1404                if (suffix == suffixLimit && *k == *suffixLimit) {
1405                    map[j].newpos = map[i].oldpos + offset;  /* yes, point to the earlier key */
1406                    /* mark the suffix as deleted */
1407                    while (*suffix != 0) { *suffix++ = 1; }
1408                    *suffix = 1;
1409                } else {
1410                    break;  /* not a suffix, restart from here */
1411                }
1412            }
1413            i = j;
1414        }
1415        /*
1416         * Re-sort by newpos, then modify the key characters array in-place
1417         * to squeeze out unused bytes, and readjust the newpos offsets.
1418         */
1419        uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1420                       compareKeyNewpos, NULL, FALSE, &errorCode);
1421        if (U_SUCCESS(errorCode)) {
1422            int32_t oldpos, newpos, limit;
1423            oldpos = newpos = fKeysBottom;
1424            limit = fKeysTop;
1425            /* skip key offsets that point into the pool bundle rather than this new bundle */
1426            for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {}
1427            if (i < keysCount) {
1428                while (oldpos < limit) {
1429                    if (keys[oldpos] == 1) {
1430                        ++oldpos;  /* skip unused bytes */
1431                    } else {
1432                        /* adjust the new offsets for keys starting here */
1433                        while (i < keysCount && map[i].newpos == oldpos) {
1434                            map[i++].newpos = newpos;
1435                        }
1436                        /* move the key characters to their new position */
1437                        keys[newpos++] = keys[oldpos++];
1438                    }
1439                }
1440                assert(i == keysCount);
1441            }
1442            fKeysTop = newpos;
1443            /* Re-sort once more, by old offsets for binary searching. */
1444            uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1445                           compareKeyOldpos, NULL, FALSE, &errorCode);
1446            if (U_SUCCESS(errorCode)) {
1447                /* key size reduction by limit - newpos */
1448                fKeyMap = map;
1449                map = NULL;
1450            }
1451        }
1452    }
1453    uprv_free(map);
1454}
1455
1456static int32_t U_CALLCONV
1457compareStringSuffixes(const void * /*context*/, const void *l, const void *r) {
1458    const StringResource *left = *((const StringResource **)l);
1459    const StringResource *right = *((const StringResource **)r);
1460    const UChar *lStart = left->getBuffer();
1461    const UChar *lLimit = lStart + left->length();
1462    const UChar *rStart = right->getBuffer();
1463    const UChar *rLimit = rStart + right->length();
1464    int32_t diff;
1465    /* compare keys in reverse character order */
1466    while (lStart < lLimit && rStart < rLimit) {
1467        diff = (int32_t)*--lLimit - (int32_t)*--rLimit;
1468        if (diff != 0) {
1469            return diff;
1470        }
1471    }
1472    /* sort equal suffixes by descending string length */
1473    return right->length() - left->length();
1474}
1475
1476static int32_t U_CALLCONV
1477compareStringLengths(const void * /*context*/, const void *l, const void *r) {
1478    const StringResource *left = *((const StringResource **)l);
1479    const StringResource *right = *((const StringResource **)r);
1480    int32_t diff;
1481    /* Make "is suffix of another string" compare greater than a non-suffix. */
1482    diff = (int)(left->fSame != NULL) - (int)(right->fSame != NULL);
1483    if (diff != 0) {
1484        return diff;
1485    }
1486    /* sort by ascending string length */
1487    diff = left->length() - right->length();
1488    if (diff != 0) {
1489        return diff;
1490    }
1491    // sort by descending size reduction
1492    diff = right->fNumUnitsSaved - left->fNumUnitsSaved;
1493    if (diff != 0) {
1494        return diff;
1495    }
1496    // sort lexically
1497    return left->fString.compare(right->fString);
1498}
1499
1500void
1501StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) {
1502    int32_t len = length();
1503    fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length());
1504    fWritten = TRUE;
1505    switch(fNumCharsForLength) {
1506    case 0:
1507        break;
1508    case 1:
1509        dest.append((UChar)(0xdc00 + len));
1510        break;
1511    case 2:
1512        dest.append((UChar)(0xdfef + (len >> 16)));
1513        dest.append((UChar)len);
1514        break;
1515    case 3:
1516        dest.append((UChar)0xdfff);
1517        dest.append((UChar)(len >> 16));
1518        dest.append((UChar)len);
1519        break;
1520    default:
1521        break;  /* will not occur */
1522    }
1523    dest.append(fString);
1524    dest.append((UChar)0);
1525}
1526
1527void
1528SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) {
1529    if (U_FAILURE(errorCode)) {
1530        return;
1531    }
1532    // Store the StringResource pointers in an array for
1533    // easy sorting and processing.
1534    // We enumerate a set of strings, so there are no duplicates.
1535    int32_t count = uhash_count(stringSet);
1536    LocalArray<StringResource *> array(new StringResource *[count], errorCode);
1537    if (U_FAILURE(errorCode)) {
1538        return;
1539    }
1540    for (int32_t pos = UHASH_FIRST, i = 0; i < count; ++i) {
1541        array[i] = (StringResource *)uhash_nextElement(stringSet, &pos)->key.pointer;
1542    }
1543    /* Sort the strings so that each one is immediately followed by all of its suffixes. */
1544    uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
1545                   compareStringSuffixes, NULL, FALSE, &errorCode);
1546    if (U_FAILURE(errorCode)) {
1547        return;
1548    }
1549    /*
1550     * Make suffixes point into earlier, longer strings that contain them.
1551     * Temporarily use fSame and fSuffixOffset for suffix strings to
1552     * refer to the remaining ones.
1553     */
1554    for (int32_t i = 0; i < count;) {
1555        /*
1556         * This string is not a suffix of the previous one;
1557         * write this one and subsume the following ones that are
1558         * suffixes of this one.
1559         */
1560        StringResource *res = array[i];
1561        res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength();
1562        // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit,
1563        // see StringResource::handlePreflightStrings().
1564        int32_t j;
1565        for (j = i + 1; j < count; ++j) {
1566            StringResource *suffixRes = array[j];
1567            /* Is it a suffix of the earlier, longer string? */
1568            if (res->fString.endsWith(suffixRes->fString)) {
1569                assert(res->length() != suffixRes->length());  // Set strings are unique.
1570                if (suffixRes->fWritten) {
1571                    // Pool string, skip.
1572                } else if (suffixRes->fNumCharsForLength == 0) {
1573                    /* yes, point to the earlier string */
1574                    suffixRes->fSame = res;
1575                    suffixRes->fSuffixOffset = res->length() - suffixRes->length();
1576                    if (res->fWritten) {
1577                        // Suffix-share res which is a pool string.
1578                        // Compute the resource word and collect the maximum.
1579                        suffixRes->fRes =
1580                                res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset;
1581                        int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(suffixRes->fRes);
1582                        if (poolStringIndex >= fPoolStringIndexLimit) {
1583                            fPoolStringIndexLimit = poolStringIndex + 1;
1584                        }
1585                        suffixRes->fWritten = TRUE;
1586                    }
1587                    res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength();
1588                } else {
1589                    /* write the suffix by itself if we need explicit length */
1590                }
1591            } else {
1592                break;  /* not a suffix, restart from here */
1593            }
1594        }
1595        i = j;
1596    }
1597    /*
1598     * Re-sort the strings by ascending length (except suffixes last)
1599     * to optimize for URES_TABLE16 and URES_ARRAY16:
1600     * Keep as many as possible within reach of 16-bit offsets.
1601     */
1602    uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
1603                   compareStringLengths, NULL, FALSE, &errorCode);
1604    if (U_FAILURE(errorCode)) {
1605        return;
1606    }
1607    if (fIsPoolBundle) {
1608        // Write strings that are sufficiently shared.
1609        // Avoid writing other strings.
1610        int32_t numStringsWritten = 0;
1611        int32_t numUnitsSaved = 0;
1612        int32_t numUnitsNotSaved = 0;
1613        for (int32_t i = 0; i < count; ++i) {
1614            StringResource *res = array[i];
1615            // Maximum pool string index when suffix-sharing the last character.
1616            int32_t maxStringIndex =
1617                    f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1;
1618            if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING &&
1619                    maxStringIndex < RES_MAX_OFFSET) {
1620                res->writeUTF16v2(0, f16BitUnits);
1621                ++numStringsWritten;
1622                numUnitsSaved += res->fNumUnitsSaved;
1623            } else {
1624                numUnitsNotSaved += res->fNumUnitsSaved;
1625                res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING);
1626                res->fWritten = TRUE;
1627            }
1628        }
1629        if (f16BitUnits.isBogus()) {
1630            errorCode = U_MEMORY_ALLOCATION_ERROR;
1631        }
1632        if (getShowWarning()) {  // not quiet
1633            printf("number of shared strings: %d\n", (int)numStringsWritten);
1634            printf("16-bit units for strings: %6d = %6d bytes\n",
1635                   (int)f16BitUnits.length(), (int)f16BitUnits.length() * 2);
1636            printf("16-bit units saved:       %6d = %6d bytes\n",
1637                   (int)numUnitsSaved, (int)numUnitsSaved * 2);
1638            printf("16-bit units not saved:   %6d = %6d bytes\n",
1639                   (int)numUnitsNotSaved, (int)numUnitsNotSaved * 2);
1640        }
1641    } else {
1642        assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit);
1643        /* Write the non-suffix strings. */
1644        int32_t i;
1645        for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
1646            StringResource *res = array[i];
1647            if (!res->fWritten) {
1648                int32_t localStringIndex = f16BitUnits.length();
1649                if (localStringIndex >= fLocalStringIndexLimit) {
1650                    fLocalStringIndexLimit = localStringIndex + 1;
1651                }
1652                res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits);
1653            }
1654        }
1655        if (f16BitUnits.isBogus()) {
1656            errorCode = U_MEMORY_ALLOCATION_ERROR;
1657            return;
1658        }
1659        if (fWritePoolBundle != NULL && gFormatVersion >= 3) {
1660            PseudoListResource *poolStrings =
1661                    static_cast<PseudoListResource *>(fWritePoolBundle->fRoot);
1662            for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
1663                assert(!array[i]->fString.isEmpty());
1664                StringResource *poolString =
1665                        new StringResource(fWritePoolBundle, array[i]->fString, errorCode);
1666                if (poolString == NULL) {
1667                    errorCode = U_MEMORY_ALLOCATION_ERROR;
1668                    break;
1669                }
1670                poolStrings->add(poolString);
1671            }
1672        }
1673        /* Write the suffix strings. Make each point to the real string. */
1674        for (; i < count; ++i) {
1675            StringResource *res = array[i];
1676            if (res->fWritten) {
1677                continue;
1678            }
1679            StringResource *same = res->fSame;
1680            assert(res->length() != same->length());  // Set strings are unique.
1681            res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset;
1682            int32_t localStringIndex = (int32_t)RES_GET_OFFSET(res->fRes) - fPoolStringIndexLimit;
1683            // Suffixes of pool strings have been set already.
1684            assert(localStringIndex >= 0);
1685            if (localStringIndex >= fLocalStringIndexLimit) {
1686                fLocalStringIndexLimit = localStringIndex + 1;
1687            }
1688            res->fWritten = TRUE;
1689        }
1690    }
1691    // +1 to account for the initial zero in f16BitUnits
1692    assert(f16BitUnits.length() <= (f16BitStringsLength + 1));
1693}
1694