StringPool.cpp revision 4bf58108d442b37ab4adf5ce3a4ecd63472ce254
1//
2// Copyright 2006 The Android Open Source Project
3//
4// Build resource files from raw assets.
5//
6
7#include "StringPool.h"
8#include "ResourceTable.h"
9
10#include <utils/ByteOrder.h>
11#include <utils/SortedVector.h>
12#include "qsort_r_compat.h"
13
14#if HAVE_PRINTF_ZD
15#  define ZD "%zd"
16#  define ZD_TYPE ssize_t
17#else
18#  define ZD "%ld"
19#  define ZD_TYPE long
20#endif
21
22#define NOISY(x) //x
23
24#if __cplusplus >= 201103L
25void strcpy16_htod(char16_t* dst, const char16_t* src)
26{
27    while (*src) {
28        char16_t s = htods(*src);
29        *dst++ = s;
30        src++;
31    }
32    *dst = 0;
33}
34#endif
35
36void strcpy16_htod(uint16_t* dst, const char16_t* src)
37{
38    while (*src) {
39        uint16_t s = htods(static_cast<uint16_t>(*src));
40        *dst++ = s;
41        src++;
42    }
43    *dst = 0;
44}
45
46void printStringPool(const ResStringPool* pool)
47{
48    if (pool->getError() == NO_INIT) {
49        printf("String pool is unitialized.\n");
50        return;
51    } else if (pool->getError() != NO_ERROR) {
52        printf("String pool is corrupt/invalid.\n");
53        return;
54    }
55
56    SortedVector<const void*> uniqueStrings;
57    const size_t N = pool->size();
58    for (size_t i=0; i<N; i++) {
59        size_t len;
60        if (pool->isUTF8()) {
61            uniqueStrings.add(pool->string8At(i, &len));
62        } else {
63            uniqueStrings.add(pool->stringAt(i, &len));
64        }
65    }
66
67    printf("String pool of " ZD " unique %s %s strings, " ZD " entries and "
68            ZD " styles using " ZD " bytes:\n",
69            (ZD_TYPE)uniqueStrings.size(), pool->isUTF8() ? "UTF-8" : "UTF-16",
70            pool->isSorted() ? "sorted" : "non-sorted",
71            (ZD_TYPE)N, (ZD_TYPE)pool->styleCount(), (ZD_TYPE)pool->bytes());
72
73    const size_t NS = pool->size();
74    for (size_t s=0; s<NS; s++) {
75        String8 str = pool->string8ObjectAt(s);
76        printf("String #" ZD ": %s\n", (ZD_TYPE) s, str.string());
77    }
78}
79
80String8 StringPool::entry::makeConfigsString() const {
81    String8 configStr(configTypeName);
82    if (configStr.size() > 0) configStr.append(" ");
83    if (configs.size() > 0) {
84        for (size_t j=0; j<configs.size(); j++) {
85            if (j > 0) configStr.append(", ");
86            configStr.append(configs[j].toString());
87        }
88    } else {
89        configStr = "(none)";
90    }
91    return configStr;
92}
93
94int StringPool::entry::compare(const entry& o) const {
95    // Strings with styles go first, to reduce the size of the styles array.
96    // We don't care about the relative order of these strings.
97    if (hasStyles) {
98        return o.hasStyles ? 0 : -1;
99    }
100    if (o.hasStyles) {
101        return 1;
102    }
103
104    // Sort unstyled strings by type, then by logical configuration.
105    int comp = configTypeName.compare(o.configTypeName);
106    if (comp != 0) {
107        return comp;
108    }
109    const size_t LHN = configs.size();
110    const size_t RHN = o.configs.size();
111    size_t i=0;
112    while (i < LHN && i < RHN) {
113        comp = configs[i].compareLogical(o.configs[i]);
114        if (comp != 0) {
115            return comp;
116        }
117        i++;
118    }
119    if (LHN < RHN) return -1;
120    else if (LHN > RHN) return 1;
121    return 0;
122}
123
124StringPool::StringPool(bool utf8) :
125        mUTF8(utf8), mValues(-1)
126{
127}
128
129ssize_t StringPool::add(const String16& value, const Vector<entry_style_span>& spans,
130        const String8* configTypeName, const ResTable_config* config)
131{
132    ssize_t res = add(value, false, configTypeName, config);
133    if (res >= 0) {
134        addStyleSpans(res, spans);
135    }
136    return res;
137}
138
139ssize_t StringPool::add(const String16& value,
140        bool mergeDuplicates, const String8* configTypeName, const ResTable_config* config)
141{
142    ssize_t vidx = mValues.indexOfKey(value);
143    ssize_t pos = vidx >= 0 ? mValues.valueAt(vidx) : -1;
144    ssize_t eidx = pos >= 0 ? mEntryArray.itemAt(pos) : -1;
145    if (eidx < 0) {
146        eidx = mEntries.add(entry(value));
147        if (eidx < 0) {
148            fprintf(stderr, "Failure adding string %s\n", String8(value).string());
149            return eidx;
150        }
151    }
152
153    if (configTypeName != NULL) {
154        entry& ent = mEntries.editItemAt(eidx);
155        NOISY(printf("*** adding config type name %s, was %s\n",
156                configTypeName->string(), ent.configTypeName.string()));
157        if (ent.configTypeName.size() <= 0) {
158            ent.configTypeName = *configTypeName;
159        } else if (ent.configTypeName != *configTypeName) {
160            ent.configTypeName = " ";
161        }
162    }
163
164    if (config != NULL) {
165        // Add this to the set of configs associated with the string.
166        entry& ent = mEntries.editItemAt(eidx);
167        size_t addPos;
168        for (addPos=0; addPos<ent.configs.size(); addPos++) {
169            int cmp = ent.configs.itemAt(addPos).compareLogical(*config);
170            if (cmp >= 0) {
171                if (cmp > 0) {
172                    NOISY(printf("*** inserting config: %s\n", config->toString().string()));
173                    ent.configs.insertAt(*config, addPos);
174                }
175                break;
176            }
177        }
178        if (addPos >= ent.configs.size()) {
179            NOISY(printf("*** adding config: %s\n", config->toString().string()));
180            ent.configs.add(*config);
181        }
182    }
183
184    const bool first = vidx < 0;
185    const bool styled = (pos >= 0 && (size_t)pos < mEntryStyleArray.size()) ?
186        mEntryStyleArray[pos].spans.size() : 0;
187    if (first || styled || !mergeDuplicates) {
188        pos = mEntryArray.add(eidx);
189        if (first) {
190            vidx = mValues.add(value, pos);
191        }
192        entry& ent = mEntries.editItemAt(eidx);
193        ent.indices.add(pos);
194    }
195
196    NOISY(printf("Adding string %s to pool: pos=%d eidx=%d vidx=%d\n",
197            String8(value).string(), pos, eidx, vidx));
198
199    return pos;
200}
201
202status_t StringPool::addStyleSpan(size_t idx, const String16& name,
203                                  uint32_t start, uint32_t end)
204{
205    entry_style_span span;
206    span.name = name;
207    span.span.firstChar = start;
208    span.span.lastChar = end;
209    return addStyleSpan(idx, span);
210}
211
212status_t StringPool::addStyleSpans(size_t idx, const Vector<entry_style_span>& spans)
213{
214    const size_t N=spans.size();
215    for (size_t i=0; i<N; i++) {
216        status_t err = addStyleSpan(idx, spans[i]);
217        if (err != NO_ERROR) {
218            return err;
219        }
220    }
221    return NO_ERROR;
222}
223
224status_t StringPool::addStyleSpan(size_t idx, const entry_style_span& span)
225{
226    // Place blank entries in the span array up to this index.
227    while (mEntryStyleArray.size() <= idx) {
228        mEntryStyleArray.add();
229    }
230
231    entry_style& style = mEntryStyleArray.editItemAt(idx);
232    style.spans.add(span);
233    mEntries.editItemAt(mEntryArray[idx]).hasStyles = true;
234    return NO_ERROR;
235}
236
237int StringPool::config_sort(void* state, const void* lhs, const void* rhs)
238{
239    StringPool* pool = (StringPool*)state;
240    const entry& lhe = pool->mEntries[pool->mEntryArray[*static_cast<const size_t*>(lhs)]];
241    const entry& rhe = pool->mEntries[pool->mEntryArray[*static_cast<const size_t*>(rhs)]];
242    return lhe.compare(rhe);
243}
244
245void StringPool::sortByConfig()
246{
247    LOG_ALWAYS_FATAL_IF(mOriginalPosToNewPos.size() > 0, "Can't sort string pool after already sorted.");
248
249    const size_t N = mEntryArray.size();
250
251    // This is a vector that starts out with a 1:1 mapping to entries
252    // in the array, which we will sort to come up with the desired order.
253    // At that point it maps from the new position in the array to the
254    // original position the entry appeared.
255    Vector<size_t> newPosToOriginalPos;
256    newPosToOriginalPos.setCapacity(N);
257    for (size_t i=0; i < N; i++) {
258        newPosToOriginalPos.add(i);
259    }
260
261    // Sort the array.
262    NOISY(printf("SORTING STRINGS BY CONFIGURATION...\n"));
263    // Vector::sort uses insertion sort, which is very slow for this data set.
264    // Use quicksort instead because we don't need a stable sort here.
265    qsort_r_compat(newPosToOriginalPos.editArray(), N, sizeof(size_t), this, config_sort);
266    //newPosToOriginalPos.sort(config_sort, this);
267    NOISY(printf("DONE SORTING STRINGS BY CONFIGURATION.\n"));
268
269    // Create the reverse mapping from the original position in the array
270    // to the new position where it appears in the sorted array.  This is
271    // so that clients can re-map any positions they had previously stored.
272    mOriginalPosToNewPos = newPosToOriginalPos;
273    for (size_t i=0; i<N; i++) {
274        mOriginalPosToNewPos.editItemAt(newPosToOriginalPos[i]) = i;
275    }
276
277#if 0
278    SortedVector<entry> entries;
279
280    for (size_t i=0; i<N; i++) {
281        printf("#%d was %d: %s\n", i, newPosToOriginalPos[i],
282                mEntries[mEntryArray[newPosToOriginalPos[i]]].makeConfigsString().string());
283        entries.add(mEntries[mEntryArray[i]]);
284    }
285
286    for (size_t i=0; i<entries.size(); i++) {
287        printf("Sorted config #%d: %s\n", i,
288                entries[i].makeConfigsString().string());
289    }
290#endif
291
292    // Now we rebuild the arrays.
293    Vector<entry> newEntries;
294    Vector<size_t> newEntryArray;
295    Vector<entry_style> newEntryStyleArray;
296    DefaultKeyedVector<size_t, size_t> origOffsetToNewOffset;
297
298    for (size_t i=0; i<N; i++) {
299        // We are filling in new offset 'i'; oldI is where we can find it
300        // in the original data structure.
301        size_t oldI = newPosToOriginalPos[i];
302        // This is the actual entry associated with the old offset.
303        const entry& oldEnt = mEntries[mEntryArray[oldI]];
304        // This is the same entry the last time we added it to the
305        // new entry array, if any.
306        ssize_t newIndexOfOffset = origOffsetToNewOffset.indexOfKey(oldI);
307        size_t newOffset;
308        if (newIndexOfOffset < 0) {
309            // This is the first time we have seen the entry, so add
310            // it.
311            newOffset = newEntries.add(oldEnt);
312            newEntries.editItemAt(newOffset).indices.clear();
313        } else {
314            // We have seen this entry before, use the existing one
315            // instead of adding it again.
316            newOffset = origOffsetToNewOffset.valueAt(newIndexOfOffset);
317        }
318        // Update the indices to include this new position.
319        newEntries.editItemAt(newOffset).indices.add(i);
320        // And add the offset of the entry to the new entry array.
321        newEntryArray.add(newOffset);
322        // Add any old style to the new style array.
323        if (mEntryStyleArray.size() > 0) {
324            if (oldI < mEntryStyleArray.size()) {
325                newEntryStyleArray.add(mEntryStyleArray[oldI]);
326            } else {
327                newEntryStyleArray.add(entry_style());
328            }
329        }
330    }
331
332    // Now trim any entries at the end of the new style array that are
333    // not needed.
334    for (ssize_t i=newEntryStyleArray.size()-1; i>=0; i--) {
335        const entry_style& style = newEntryStyleArray[i];
336        if (style.spans.size() > 0) {
337            // That's it.
338            break;
339        }
340        // This one is not needed; remove.
341        newEntryStyleArray.removeAt(i);
342    }
343
344    // All done, install the new data structures and upate mValues with
345    // the new positions.
346    mEntries = newEntries;
347    mEntryArray = newEntryArray;
348    mEntryStyleArray = newEntryStyleArray;
349    mValues.clear();
350    for (size_t i=0; i<mEntries.size(); i++) {
351        const entry& ent = mEntries[i];
352        mValues.add(ent.value, ent.indices[0]);
353    }
354
355#if 0
356    printf("FINAL SORTED STRING CONFIGS:\n");
357    for (size_t i=0; i<mEntries.size(); i++) {
358        const entry& ent = mEntries[i];
359        printf("#" ZD " %s: %s\n", (ZD_TYPE)i, ent.makeConfigsString().string(),
360                String8(ent.value).string());
361    }
362#endif
363}
364
365sp<AaptFile> StringPool::createStringBlock()
366{
367    sp<AaptFile> pool = new AaptFile(String8(), AaptGroupEntry(),
368                                     String8());
369    status_t err = writeStringBlock(pool);
370    return err == NO_ERROR ? pool : NULL;
371}
372
373#define ENCODE_LENGTH(str, chrsz, strSize) \
374{ \
375    size_t maxMask = 1 << ((chrsz*8)-1); \
376    size_t maxSize = maxMask-1; \
377    if (strSize > maxSize) { \
378        *str++ = maxMask | ((strSize>>(chrsz*8))&maxSize); \
379    } \
380    *str++ = strSize; \
381}
382
383status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
384{
385    // Allow appending.  Sorry this is a little wacky.
386    if (pool->getSize() > 0) {
387        sp<AaptFile> block = createStringBlock();
388        if (block == NULL) {
389            return UNKNOWN_ERROR;
390        }
391        ssize_t res = pool->writeData(block->getData(), block->getSize());
392        return (res >= 0) ? (status_t)NO_ERROR : res;
393    }
394
395    // First we need to add all style span names to the string pool.
396    // We do this now (instead of when the span is added) so that these
397    // will appear at the end of the pool, not disrupting the order
398    // our client placed their own strings in it.
399
400    const size_t STYLES = mEntryStyleArray.size();
401    size_t i;
402
403    for (i=0; i<STYLES; i++) {
404        entry_style& style = mEntryStyleArray.editItemAt(i);
405        const size_t N = style.spans.size();
406        for (size_t i=0; i<N; i++) {
407            entry_style_span& span = style.spans.editItemAt(i);
408            ssize_t idx = add(span.name, true);
409            if (idx < 0) {
410                fprintf(stderr, "Error adding span for style tag '%s'\n",
411                        String8(span.name).string());
412                return idx;
413            }
414            span.span.name.index = (uint32_t)idx;
415        }
416    }
417
418    const size_t ENTRIES = mEntryArray.size();
419
420    // Now build the pool of unique strings.
421
422    const size_t STRINGS = mEntries.size();
423    const size_t preSize = sizeof(ResStringPool_header)
424                         + (sizeof(uint32_t)*ENTRIES)
425                         + (sizeof(uint32_t)*STYLES);
426    if (pool->editData(preSize) == NULL) {
427        fprintf(stderr, "ERROR: Out of memory for string pool\n");
428        return NO_MEMORY;
429    }
430
431    const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(uint16_t);
432
433    size_t strPos = 0;
434    for (i=0; i<STRINGS; i++) {
435        entry& ent = mEntries.editItemAt(i);
436        const size_t strSize = (ent.value.size());
437        const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
438            charSize*2 : charSize;
439
440        String8 encStr;
441        if (mUTF8) {
442            encStr = String8(ent.value);
443        }
444
445        const size_t encSize = mUTF8 ? encStr.size() : 0;
446        const size_t encLenSize = mUTF8 ?
447            (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
448                charSize*2 : charSize) : 0;
449
450        ent.offset = strPos;
451
452        const size_t totalSize = lenSize + encLenSize +
453            ((mUTF8 ? encSize : strSize)+1)*charSize;
454
455        void* dat = (void*)pool->editData(preSize + strPos + totalSize);
456        if (dat == NULL) {
457            fprintf(stderr, "ERROR: Out of memory for string pool\n");
458            return NO_MEMORY;
459        }
460        dat = (uint8_t*)dat + preSize + strPos;
461        if (mUTF8) {
462            uint8_t* strings = (uint8_t*)dat;
463
464            ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
465
466            ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
467
468            strncpy((char*)strings, encStr, encSize+1);
469        } else {
470            uint16_t* strings = (uint16_t*)dat;
471
472            ENCODE_LENGTH(strings, sizeof(uint16_t), strSize)
473
474            strcpy16_htod(strings, ent.value);
475        }
476
477        strPos += totalSize;
478    }
479
480    // Pad ending string position up to a uint32_t boundary.
481
482    if (strPos&0x3) {
483        size_t padPos = ((strPos+3)&~0x3);
484        uint8_t* dat = (uint8_t*)pool->editData(preSize + padPos);
485        if (dat == NULL) {
486            fprintf(stderr, "ERROR: Out of memory padding string pool\n");
487            return NO_MEMORY;
488        }
489        memset(dat+preSize+strPos, 0, padPos-strPos);
490        strPos = padPos;
491    }
492
493    // Build the pool of style spans.
494
495    size_t styPos = strPos;
496    for (i=0; i<STYLES; i++) {
497        entry_style& ent = mEntryStyleArray.editItemAt(i);
498        const size_t N = ent.spans.size();
499        const size_t totalSize = (N*sizeof(ResStringPool_span))
500                               + sizeof(ResStringPool_ref);
501
502        ent.offset = styPos-strPos;
503        uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + totalSize);
504        if (dat == NULL) {
505            fprintf(stderr, "ERROR: Out of memory for string styles\n");
506            return NO_MEMORY;
507        }
508        ResStringPool_span* span = (ResStringPool_span*)(dat+preSize+styPos);
509        for (size_t i=0; i<N; i++) {
510            span->name.index = htodl(ent.spans[i].span.name.index);
511            span->firstChar = htodl(ent.spans[i].span.firstChar);
512            span->lastChar = htodl(ent.spans[i].span.lastChar);
513            span++;
514        }
515        span->name.index = htodl(ResStringPool_span::END);
516
517        styPos += totalSize;
518    }
519
520    if (STYLES > 0) {
521        // Add full terminator at the end (when reading we validate that
522        // the end of the pool is fully terminated to simplify error
523        // checking).
524        size_t extra = sizeof(ResStringPool_span)-sizeof(ResStringPool_ref);
525        uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + extra);
526        if (dat == NULL) {
527            fprintf(stderr, "ERROR: Out of memory for string styles\n");
528            return NO_MEMORY;
529        }
530        uint32_t* p = (uint32_t*)(dat+preSize+styPos);
531        while (extra > 0) {
532            *p++ = htodl(ResStringPool_span::END);
533            extra -= sizeof(uint32_t);
534        }
535        styPos += extra;
536    }
537
538    // Write header.
539
540    ResStringPool_header* header =
541        (ResStringPool_header*)pool->padData(sizeof(uint32_t));
542    if (header == NULL) {
543        fprintf(stderr, "ERROR: Out of memory for string pool\n");
544        return NO_MEMORY;
545    }
546    memset(header, 0, sizeof(*header));
547    header->header.type = htods(RES_STRING_POOL_TYPE);
548    header->header.headerSize = htods(sizeof(*header));
549    header->header.size = htodl(pool->getSize());
550    header->stringCount = htodl(ENTRIES);
551    header->styleCount = htodl(STYLES);
552    if (mUTF8) {
553        header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
554    }
555    header->stringsStart = htodl(preSize);
556    header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);
557
558    // Write string index array.
559
560    uint32_t* index = (uint32_t*)(header+1);
561    for (i=0; i<ENTRIES; i++) {
562        entry& ent = mEntries.editItemAt(mEntryArray[i]);
563        *index++ = htodl(ent.offset);
564        NOISY(printf("Writing entry #%d: \"%s\" ent=%d off=%d\n", i,
565                String8(ent.value).string(),
566                mEntryArray[i], ent.offset));
567    }
568
569    // Write style index array.
570
571    for (i=0; i<STYLES; i++) {
572        *index++ = htodl(mEntryStyleArray[i].offset);
573    }
574
575    return NO_ERROR;
576}
577
578ssize_t StringPool::offsetForString(const String16& val) const
579{
580    const Vector<size_t>* indices = offsetsForString(val);
581    ssize_t res = indices != NULL && indices->size() > 0 ? indices->itemAt(0) : -1;
582    NOISY(printf("Offset for string %s: %d (%s)\n", String8(val).string(), res,
583            res >= 0 ? String8(mEntries[mEntryArray[res]].value).string() : String8()));
584    return res;
585}
586
587const Vector<size_t>* StringPool::offsetsForString(const String16& val) const
588{
589    ssize_t pos = mValues.valueFor(val);
590    if (pos < 0) {
591        return NULL;
592    }
593    return &mEntries[mEntryArray[pos]].indices;
594}
595