StringPool.cpp revision 2412f84064c26b643c722ce914a97c4ec7776c69
1//
2// Copyright 2006 The Android Open Source Project
3//
4// Build resource files from raw assets.
5//
6#include "StringPool.h"
7
8#include <utils/ByteOrder.h>
9#include <utils/SortedVector.h>
10
11#include <algorithm>
12
13#include "ResourceTable.h"
14
15// SSIZE: mingw does not have signed size_t == ssize_t.
16#if HAVE_PRINTF_ZD
17#  define ZD "%zd"
18#  define ZD_TYPE ssize_t
19#  define SSIZE(x) x
20#else
21#  define ZD "%ld"
22#  define ZD_TYPE long
23#  define SSIZE(x) (signed size_t)x
24#endif
25
26// Set to true for noisy debug output.
27static const bool kIsDebug = false;
28
29void strcpy16_htod(char16_t* dst, const char16_t* src)
30{
31    while (*src) {
32        char16_t s = htods(*src);
33        *dst++ = s;
34        src++;
35    }
36    *dst = 0;
37}
38
39void printStringPool(const ResStringPool* pool)
40{
41    SortedVector<const void*> uniqueStrings;
42    const size_t N = pool->size();
43    for (size_t i=0; i<N; i++) {
44        size_t len;
45        if (pool->isUTF8()) {
46            uniqueStrings.add(pool->string8At(i, &len));
47        } else {
48            uniqueStrings.add(pool->stringAt(i, &len));
49        }
50    }
51
52    printf("String pool of " ZD " unique %s %s strings, " ZD " entries and "
53            ZD " styles using " ZD " bytes:\n",
54            (ZD_TYPE)uniqueStrings.size(), pool->isUTF8() ? "UTF-8" : "UTF-16",
55            pool->isSorted() ? "sorted" : "non-sorted",
56            (ZD_TYPE)N, (ZD_TYPE)pool->styleCount(), (ZD_TYPE)pool->bytes());
57
58    const size_t NS = pool->size();
59    for (size_t s=0; s<NS; s++) {
60        String8 str = pool->string8ObjectAt(s);
61        printf("String #" ZD ": %s\n", (ZD_TYPE) s, str.string());
62    }
63}
64
65String8 StringPool::entry::makeConfigsString() const {
66    String8 configStr(configTypeName);
67    if (configStr.size() > 0) configStr.append(" ");
68    if (configs.size() > 0) {
69        for (size_t j=0; j<configs.size(); j++) {
70            if (j > 0) configStr.append(", ");
71            configStr.append(configs[j].toString());
72        }
73    } else {
74        configStr = "(none)";
75    }
76    return configStr;
77}
78
79int StringPool::entry::compare(const entry& o) const {
80    // Strings with styles go first, to reduce the size of the styles array.
81    // We don't care about the relative order of these strings.
82    if (hasStyles) {
83        return o.hasStyles ? 0 : -1;
84    }
85    if (o.hasStyles) {
86        return 1;
87    }
88
89    // Sort unstyled strings by type, then by logical configuration.
90    int comp = configTypeName.compare(o.configTypeName);
91    if (comp != 0) {
92        return comp;
93    }
94    const size_t LHN = configs.size();
95    const size_t RHN = o.configs.size();
96    size_t i=0;
97    while (i < LHN && i < RHN) {
98        comp = configs[i].compareLogical(o.configs[i]);
99        if (comp != 0) {
100            return comp;
101        }
102        i++;
103    }
104    if (LHN < RHN) return -1;
105    else if (LHN > RHN) return 1;
106    return 0;
107}
108
109StringPool::StringPool(bool utf8) :
110        mUTF8(utf8), mValues(-1)
111{
112}
113
114ssize_t StringPool::add(const String16& value, const Vector<entry_style_span>& spans,
115        const String8* configTypeName, const ResTable_config* config)
116{
117    ssize_t res = add(value, false, configTypeName, config);
118    if (res >= 0) {
119        addStyleSpans(res, spans);
120    }
121    return res;
122}
123
124ssize_t StringPool::add(const String16& value,
125        bool mergeDuplicates, const String8* configTypeName, const ResTable_config* config)
126{
127    ssize_t vidx = mValues.indexOfKey(value);
128    ssize_t pos = vidx >= 0 ? mValues.valueAt(vidx) : -1;
129    ssize_t eidx = pos >= 0 ? mEntryArray.itemAt(pos) : -1;
130    if (eidx < 0) {
131        eidx = mEntries.add(entry(value));
132        if (eidx < 0) {
133            fprintf(stderr, "Failure adding string %s\n", String8(value).string());
134            return eidx;
135        }
136    }
137
138    if (configTypeName != NULL) {
139        entry& ent = mEntries.editItemAt(eidx);
140        if (kIsDebug) {
141            printf("*** adding config type name %s, was %s\n",
142                    configTypeName->string(), ent.configTypeName.string());
143        }
144        if (ent.configTypeName.size() <= 0) {
145            ent.configTypeName = *configTypeName;
146        } else if (ent.configTypeName != *configTypeName) {
147            ent.configTypeName = " ";
148        }
149    }
150
151    if (config != NULL) {
152        // Add this to the set of configs associated with the string.
153        entry& ent = mEntries.editItemAt(eidx);
154        size_t addPos;
155        for (addPos=0; addPos<ent.configs.size(); addPos++) {
156            int cmp = ent.configs.itemAt(addPos).compareLogical(*config);
157            if (cmp >= 0) {
158                if (cmp > 0) {
159                    if (kIsDebug) {
160                        printf("*** inserting config: %s\n", config->toString().string());
161                    }
162                    ent.configs.insertAt(*config, addPos);
163                }
164                break;
165            }
166        }
167        if (addPos >= ent.configs.size()) {
168            if (kIsDebug) {
169                printf("*** adding config: %s\n", config->toString().string());
170            }
171            ent.configs.add(*config);
172        }
173    }
174
175    const bool first = vidx < 0;
176    const bool styled = (pos >= 0 && (size_t)pos < mEntryStyleArray.size()) ?
177        mEntryStyleArray[pos].spans.size() : 0;
178    if (first || styled || !mergeDuplicates) {
179        pos = mEntryArray.add(eidx);
180        if (first) {
181            vidx = mValues.add(value, pos);
182        }
183        entry& ent = mEntries.editItemAt(eidx);
184        ent.indices.add(pos);
185    }
186
187    if (kIsDebug) {
188        printf("Adding string %s to pool: pos=%zd eidx=%zd vidx=%zd\n",
189                String8(value).string(), SSIZE(pos), SSIZE(eidx), SSIZE(vidx));
190    }
191
192    return pos;
193}
194
195status_t StringPool::addStyleSpan(size_t idx, const String16& name,
196                                  uint32_t start, uint32_t end)
197{
198    entry_style_span span;
199    span.name = name;
200    span.span.firstChar = start;
201    span.span.lastChar = end;
202    return addStyleSpan(idx, span);
203}
204
205status_t StringPool::addStyleSpans(size_t idx, const Vector<entry_style_span>& spans)
206{
207    const size_t N=spans.size();
208    for (size_t i=0; i<N; i++) {
209        status_t err = addStyleSpan(idx, spans[i]);
210        if (err != NO_ERROR) {
211            return err;
212        }
213    }
214    return NO_ERROR;
215}
216
217status_t StringPool::addStyleSpan(size_t idx, const entry_style_span& span)
218{
219    // Place blank entries in the span array up to this index.
220    while (mEntryStyleArray.size() <= idx) {
221        mEntryStyleArray.add();
222    }
223
224    entry_style& style = mEntryStyleArray.editItemAt(idx);
225    style.spans.add(span);
226    mEntries.editItemAt(mEntryArray[idx]).hasStyles = true;
227    return NO_ERROR;
228}
229
230StringPool::ConfigSorter::ConfigSorter(const StringPool& pool) : pool(pool)
231{
232}
233
234bool StringPool::ConfigSorter::operator()(size_t l, size_t r)
235{
236    const StringPool::entry& lhe = pool.mEntries[pool.mEntryArray[l]];
237    const StringPool::entry& rhe = pool.mEntries[pool.mEntryArray[r]];
238    return lhe.compare(rhe) < 0;
239}
240
241void StringPool::sortByConfig()
242{
243    LOG_ALWAYS_FATAL_IF(mOriginalPosToNewPos.size() > 0, "Can't sort string pool after already sorted.");
244
245    const size_t N = mEntryArray.size();
246
247    // This is a vector that starts out with a 1:1 mapping to entries
248    // in the array, which we will sort to come up with the desired order.
249    // At that point it maps from the new position in the array to the
250    // original position the entry appeared.
251    Vector<size_t> newPosToOriginalPos;
252    newPosToOriginalPos.setCapacity(N);
253    for (size_t i=0; i < N; i++) {
254        newPosToOriginalPos.add(i);
255    }
256
257    // Sort the array.
258    if (kIsDebug) {
259        printf("SORTING STRINGS BY CONFIGURATION...\n");
260    }
261    ConfigSorter sorter(*this);
262    std::sort(newPosToOriginalPos.begin(), newPosToOriginalPos.end(), sorter);
263    if (kIsDebug) {
264        printf("DONE SORTING STRINGS BY CONFIGURATION.\n");
265    }
266
267    // Create the reverse mapping from the original position in the array
268    // to the new position where it appears in the sorted array.  This is
269    // so that clients can re-map any positions they had previously stored.
270    mOriginalPosToNewPos = newPosToOriginalPos;
271    for (size_t i=0; i<N; i++) {
272        mOriginalPosToNewPos.editItemAt(newPosToOriginalPos[i]) = i;
273    }
274
275#if 0
276    SortedVector<entry> entries;
277
278    for (size_t i=0; i<N; i++) {
279        printf("#%d was %d: %s\n", i, newPosToOriginalPos[i],
280                mEntries[mEntryArray[newPosToOriginalPos[i]]].makeConfigsString().string());
281        entries.add(mEntries[mEntryArray[i]]);
282    }
283
284    for (size_t i=0; i<entries.size(); i++) {
285        printf("Sorted config #%d: %s\n", i,
286                entries[i].makeConfigsString().string());
287    }
288#endif
289
290    // Now we rebuild the arrays.
291    Vector<entry> newEntries;
292    Vector<size_t> newEntryArray;
293    Vector<entry_style> newEntryStyleArray;
294    DefaultKeyedVector<size_t, size_t> origOffsetToNewOffset;
295
296    for (size_t i=0; i<N; i++) {
297        // We are filling in new offset 'i'; oldI is where we can find it
298        // in the original data structure.
299        size_t oldI = newPosToOriginalPos[i];
300        // This is the actual entry associated with the old offset.
301        const entry& oldEnt = mEntries[mEntryArray[oldI]];
302        // This is the same entry the last time we added it to the
303        // new entry array, if any.
304        ssize_t newIndexOfOffset = origOffsetToNewOffset.indexOfKey(oldI);
305        size_t newOffset;
306        if (newIndexOfOffset < 0) {
307            // This is the first time we have seen the entry, so add
308            // it.
309            newOffset = newEntries.add(oldEnt);
310            newEntries.editItemAt(newOffset).indices.clear();
311        } else {
312            // We have seen this entry before, use the existing one
313            // instead of adding it again.
314            newOffset = origOffsetToNewOffset.valueAt(newIndexOfOffset);
315        }
316        // Update the indices to include this new position.
317        newEntries.editItemAt(newOffset).indices.add(i);
318        // And add the offset of the entry to the new entry array.
319        newEntryArray.add(newOffset);
320        // Add any old style to the new style array.
321        if (mEntryStyleArray.size() > 0) {
322            if (oldI < mEntryStyleArray.size()) {
323                newEntryStyleArray.add(mEntryStyleArray[oldI]);
324            } else {
325                newEntryStyleArray.add(entry_style());
326            }
327        }
328    }
329
330    // Now trim any entries at the end of the new style array that are
331    // not needed.
332    for (ssize_t i=newEntryStyleArray.size()-1; i>=0; i--) {
333        const entry_style& style = newEntryStyleArray[i];
334        if (style.spans.size() > 0) {
335            // That's it.
336            break;
337        }
338        // This one is not needed; remove.
339        newEntryStyleArray.removeAt(i);
340    }
341
342    // All done, install the new data structures and upate mValues with
343    // the new positions.
344    mEntries = newEntries;
345    mEntryArray = newEntryArray;
346    mEntryStyleArray = newEntryStyleArray;
347    mValues.clear();
348    for (size_t i=0; i<mEntries.size(); i++) {
349        const entry& ent = mEntries[i];
350        mValues.add(ent.value, ent.indices[0]);
351    }
352
353#if 0
354    printf("FINAL SORTED STRING CONFIGS:\n");
355    for (size_t i=0; i<mEntries.size(); i++) {
356        const entry& ent = mEntries[i];
357        printf("#" ZD " %s: %s\n", (ZD_TYPE)i, ent.makeConfigsString().string(),
358                String8(ent.value).string());
359    }
360#endif
361}
362
363sp<AaptFile> StringPool::createStringBlock()
364{
365    sp<AaptFile> pool = new AaptFile(String8(), AaptGroupEntry(),
366                                     String8());
367    status_t err = writeStringBlock(pool);
368    return err == NO_ERROR ? pool : NULL;
369}
370
371#define ENCODE_LENGTH(str, chrsz, strSize) \
372{ \
373    size_t maxMask = 1 << ((chrsz*8)-1); \
374    size_t maxSize = maxMask-1; \
375    if (strSize > maxSize) { \
376        *str++ = maxMask | ((strSize>>(chrsz*8))&maxSize); \
377    } \
378    *str++ = strSize; \
379}
380
381status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
382{
383    // Allow appending.  Sorry this is a little wacky.
384    if (pool->getSize() > 0) {
385        sp<AaptFile> block = createStringBlock();
386        if (block == NULL) {
387            return UNKNOWN_ERROR;
388        }
389        ssize_t res = pool->writeData(block->getData(), block->getSize());
390        return (res >= 0) ? (status_t)NO_ERROR : res;
391    }
392
393    // First we need to add all style span names to the string pool.
394    // We do this now (instead of when the span is added) so that these
395    // will appear at the end of the pool, not disrupting the order
396    // our client placed their own strings in it.
397
398    const size_t STYLES = mEntryStyleArray.size();
399    size_t i;
400
401    for (i=0; i<STYLES; i++) {
402        entry_style& style = mEntryStyleArray.editItemAt(i);
403        const size_t N = style.spans.size();
404        for (size_t i=0; i<N; i++) {
405            entry_style_span& span = style.spans.editItemAt(i);
406            ssize_t idx = add(span.name, true);
407            if (idx < 0) {
408                fprintf(stderr, "Error adding span for style tag '%s'\n",
409                        String8(span.name).string());
410                return idx;
411            }
412            span.span.name.index = (uint32_t)idx;
413        }
414    }
415
416    const size_t ENTRIES = mEntryArray.size();
417
418    // Now build the pool of unique strings.
419
420    const size_t STRINGS = mEntries.size();
421    const size_t preSize = sizeof(ResStringPool_header)
422                         + (sizeof(uint32_t)*ENTRIES)
423                         + (sizeof(uint32_t)*STYLES);
424    if (pool->editData(preSize) == NULL) {
425        fprintf(stderr, "ERROR: Out of memory for string pool\n");
426        return NO_MEMORY;
427    }
428
429    const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(char16_t);
430
431    size_t strPos = 0;
432    for (i=0; i<STRINGS; i++) {
433        entry& ent = mEntries.editItemAt(i);
434        const size_t strSize = (ent.value.size());
435        const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
436            charSize*2 : charSize;
437
438        String8 encStr;
439        if (mUTF8) {
440            encStr = String8(ent.value);
441        }
442
443        const size_t encSize = mUTF8 ? encStr.size() : 0;
444        const size_t encLenSize = mUTF8 ?
445            (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
446                charSize*2 : charSize) : 0;
447
448        ent.offset = strPos;
449
450        const size_t totalSize = lenSize + encLenSize +
451            ((mUTF8 ? encSize : strSize)+1)*charSize;
452
453        void* dat = (void*)pool->editData(preSize + strPos + totalSize);
454        if (dat == NULL) {
455            fprintf(stderr, "ERROR: Out of memory for string pool\n");
456            return NO_MEMORY;
457        }
458        dat = (uint8_t*)dat + preSize + strPos;
459        if (mUTF8) {
460            uint8_t* strings = (uint8_t*)dat;
461
462            ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
463
464            ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
465
466            strncpy((char*)strings, encStr, encSize+1);
467        } else {
468            char16_t* strings = (char16_t*)dat;
469
470            ENCODE_LENGTH(strings, sizeof(char16_t), strSize)
471
472            strcpy16_htod(strings, ent.value);
473        }
474
475        strPos += totalSize;
476    }
477
478    // Pad ending string position up to a uint32_t boundary.
479
480    if (strPos&0x3) {
481        size_t padPos = ((strPos+3)&~0x3);
482        uint8_t* dat = (uint8_t*)pool->editData(preSize + padPos);
483        if (dat == NULL) {
484            fprintf(stderr, "ERROR: Out of memory padding string pool\n");
485            return NO_MEMORY;
486        }
487        memset(dat+preSize+strPos, 0, padPos-strPos);
488        strPos = padPos;
489    }
490
491    // Build the pool of style spans.
492
493    size_t styPos = strPos;
494    for (i=0; i<STYLES; i++) {
495        entry_style& ent = mEntryStyleArray.editItemAt(i);
496        const size_t N = ent.spans.size();
497        const size_t totalSize = (N*sizeof(ResStringPool_span))
498                               + sizeof(ResStringPool_ref);
499
500        ent.offset = styPos-strPos;
501        uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + totalSize);
502        if (dat == NULL) {
503            fprintf(stderr, "ERROR: Out of memory for string styles\n");
504            return NO_MEMORY;
505        }
506        ResStringPool_span* span = (ResStringPool_span*)(dat+preSize+styPos);
507        for (size_t i=0; i<N; i++) {
508            span->name.index = htodl(ent.spans[i].span.name.index);
509            span->firstChar = htodl(ent.spans[i].span.firstChar);
510            span->lastChar = htodl(ent.spans[i].span.lastChar);
511            span++;
512        }
513        span->name.index = htodl(ResStringPool_span::END);
514
515        styPos += totalSize;
516    }
517
518    if (STYLES > 0) {
519        // Add full terminator at the end (when reading we validate that
520        // the end of the pool is fully terminated to simplify error
521        // checking).
522        size_t extra = sizeof(ResStringPool_span)-sizeof(ResStringPool_ref);
523        uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + extra);
524        if (dat == NULL) {
525            fprintf(stderr, "ERROR: Out of memory for string styles\n");
526            return NO_MEMORY;
527        }
528        uint32_t* p = (uint32_t*)(dat+preSize+styPos);
529        while (extra > 0) {
530            *p++ = htodl(ResStringPool_span::END);
531            extra -= sizeof(uint32_t);
532        }
533        styPos += extra;
534    }
535
536    // Write header.
537
538    ResStringPool_header* header =
539        (ResStringPool_header*)pool->padData(sizeof(uint32_t));
540    if (header == NULL) {
541        fprintf(stderr, "ERROR: Out of memory for string pool\n");
542        return NO_MEMORY;
543    }
544    memset(header, 0, sizeof(*header));
545    header->header.type = htods(RES_STRING_POOL_TYPE);
546    header->header.headerSize = htods(sizeof(*header));
547    header->header.size = htodl(pool->getSize());
548    header->stringCount = htodl(ENTRIES);
549    header->styleCount = htodl(STYLES);
550    if (mUTF8) {
551        header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
552    }
553    header->stringsStart = htodl(preSize);
554    header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);
555
556    // Write string index array.
557
558    uint32_t* index = (uint32_t*)(header+1);
559    for (i=0; i<ENTRIES; i++) {
560        entry& ent = mEntries.editItemAt(mEntryArray[i]);
561        *index++ = htodl(ent.offset);
562        if (kIsDebug) {
563            printf("Writing entry #%zu: \"%s\" ent=%zu off=%zu\n",
564                    i,
565                    String8(ent.value).string(),
566                    mEntryArray[i],
567                    ent.offset);
568        }
569    }
570
571    // Write style index array.
572
573    for (i=0; i<STYLES; i++) {
574        *index++ = htodl(mEntryStyleArray[i].offset);
575    }
576
577    return NO_ERROR;
578}
579
580ssize_t StringPool::offsetForString(const String16& val) const
581{
582    const Vector<size_t>* indices = offsetsForString(val);
583    ssize_t res = indices != NULL && indices->size() > 0 ? indices->itemAt(0) : -1;
584    if (kIsDebug) {
585        printf("Offset for string %s: %zd (%s)\n", String8(val).string(), SSIZE(res),
586                res >= 0 ? String8(mEntries[mEntryArray[res]].value).string() : String8());
587    }
588    return res;
589}
590
591const Vector<size_t>* StringPool::offsetsForString(const String16& val) const
592{
593    ssize_t pos = mValues.valueFor(val);
594    if (pos < 0) {
595        return NULL;
596    }
597    return &mEntries[mEntryArray[pos]].indices;
598}
599