StringPool.cpp revision db6e67d8ec081f43bcf14155741e268a38915719
1//
2// Copyright 2006 The Android Open Source Project
3//
4// Build resource files from raw assets.
5//
6
7#include "StringPool.h"
8#include "ResourceTable.h"
9
10#include <utils/ByteOrder.h>
11#include <utils/SortedVector.h>
12
13#if HAVE_PRINTF_ZD
14#  define ZD "%zd"
15#  define ZD_TYPE ssize_t
16#else
17#  define ZD "%ld"
18#  define ZD_TYPE long
19#endif
20
21#define NOISY(x) //x
22
23void strcpy16_htod(uint16_t* dst, const uint16_t* src)
24{
25    while (*src) {
26        char16_t s = htods(*src);
27        *dst++ = s;
28        src++;
29    }
30    *dst = 0;
31}
32
33void printStringPool(const ResStringPool* pool)
34{
35    SortedVector<const void*> uniqueStrings;
36    const size_t N = pool->size();
37    for (size_t i=0; i<N; i++) {
38        size_t len;
39        if (pool->isUTF8()) {
40            uniqueStrings.add(pool->string8At(i, &len));
41        } else {
42            uniqueStrings.add(pool->stringAt(i, &len));
43        }
44    }
45
46    printf("String pool of " ZD " unique %s %s strings, " ZD " entries and "
47            ZD " styles using " ZD " bytes:\n",
48            (ZD_TYPE)uniqueStrings.size(), pool->isUTF8() ? "UTF-8" : "UTF-16",
49            pool->isSorted() ? "sorted" : "non-sorted",
50            (ZD_TYPE)N, (ZD_TYPE)pool->styleCount(), (ZD_TYPE)pool->bytes());
51
52    const size_t NS = pool->size();
53    for (size_t s=0; s<NS; s++) {
54        String8 str = pool->string8ObjectAt(s);
55        printf("String #" ZD ": %s\n", (ZD_TYPE) s, str.string());
56    }
57}
58
59String8 StringPool::entry::makeConfigsString() const {
60    String8 configStr(configTypeName);
61    if (configStr.size() > 0) configStr.append(" ");
62    if (configs.size() > 0) {
63        for (size_t j=0; j<configs.size(); j++) {
64            if (j > 0) configStr.append(", ");
65            configStr.append(configs[j].toString());
66        }
67    } else {
68        configStr = "(none)";
69    }
70    return configStr;
71}
72
73int StringPool::entry::compare(const entry& o) const {
74    // Strings with styles go first, to reduce the size of the
75    // styles array.
76    if (hasStyles) {
77        return o.hasStyles ? 0 : -1;
78    }
79    if (o.hasStyles) {
80        return 1;
81    }
82    int comp = configTypeName.compare(o.configTypeName);
83    if (comp != 0) {
84        return comp;
85    }
86    const size_t LHN = configs.size();
87    const size_t RHN = o.configs.size();
88    size_t i=0;
89    while (i < LHN && i < RHN) {
90        comp = configs[i].compareLogical(o.configs[i]);
91        if (comp != 0) {
92            return comp;
93        }
94        i++;
95    }
96    if (LHN < RHN) return -1;
97    else if (LHN > RHN) return 1;
98    return 0;
99}
100
101StringPool::StringPool(bool sorted, bool utf8)
102    : mSorted(sorted), mUTF8(utf8), mValues(-1), mIdents(-1)
103{
104}
105
106ssize_t StringPool::add(const String16& value, bool mergeDuplicates,
107        const String8* configTypeName, const ResTable_config* config)
108{
109    return add(String16(), value, mergeDuplicates, configTypeName, config);
110}
111
112ssize_t StringPool::add(const String16& value, const Vector<entry_style_span>& spans,
113        const String8* configTypeName, const ResTable_config* config)
114{
115    ssize_t res = add(String16(), value, false, configTypeName, config);
116    if (res >= 0) {
117        addStyleSpans(res, spans);
118    }
119    return res;
120}
121
122ssize_t StringPool::add(const String16& ident, const String16& value,
123        bool mergeDuplicates, const String8* configTypeName, const ResTable_config* config)
124{
125    if (ident.size() > 0) {
126        ssize_t idx = mIdents.valueFor(ident);
127        if (idx >= 0) {
128            fprintf(stderr, "ERROR: Duplicate string identifier %s\n",
129                    String8(mEntries[idx].value).string());
130            return UNKNOWN_ERROR;
131        }
132    }
133
134    ssize_t vidx = mValues.indexOfKey(value);
135    ssize_t pos = vidx >= 0 ? mValues.valueAt(vidx) : -1;
136    ssize_t eidx = pos >= 0 ? mEntryArray.itemAt(pos) : -1;
137    if (eidx < 0) {
138        eidx = mEntries.add(entry(value));
139        if (eidx < 0) {
140            fprintf(stderr, "Failure adding string %s\n", String8(value).string());
141            return eidx;
142        }
143    }
144
145    if (configTypeName != NULL) {
146        entry& ent = mEntries.editItemAt(eidx);
147        NOISY(printf("*** adding config type name %s, was %s\n",
148                configTypeName->string(), ent.configTypeName.string()));
149        if (ent.configTypeName.size() <= 0) {
150            ent.configTypeName = *configTypeName;
151        } else if (ent.configTypeName != *configTypeName) {
152            ent.configTypeName = " ";
153        }
154    }
155
156    if (config != NULL) {
157        // Add this to the set of configs associated with the string.
158        entry& ent = mEntries.editItemAt(eidx);
159        size_t addPos;
160        for (addPos=0; addPos<ent.configs.size(); addPos++) {
161            int cmp = ent.configs.itemAt(addPos).compareLogical(*config);
162            if (cmp >= 0) {
163                if (cmp > 0) {
164                    NOISY(printf("*** inserting config: %s\n", config->toString().string()));
165                    ent.configs.insertAt(*config, addPos);
166                }
167                break;
168            }
169        }
170        if (addPos >= ent.configs.size()) {
171            NOISY(printf("*** adding config: %s\n", config->toString().string()));
172            ent.configs.add(*config);
173        }
174    }
175
176    const bool first = vidx < 0;
177    const bool styled = (pos >= 0 && (size_t)pos < mEntryStyleArray.size()) ?
178        mEntryStyleArray[pos].spans.size() : 0;
179    if (first || styled || !mergeDuplicates) {
180        pos = mEntryArray.add(eidx);
181        if (first) {
182            vidx = mValues.add(value, pos);
183        }
184        if (!mSorted) {
185            entry& ent = mEntries.editItemAt(eidx);
186            ent.indices.add(pos);
187        }
188    }
189
190    if (ident.size() > 0) {
191        mIdents.add(ident, vidx);
192    }
193
194    NOISY(printf("Adding string %s to pool: pos=%d eidx=%d vidx=%d\n",
195            String8(value).string(), pos, eidx, vidx));
196
197    return pos;
198}
199
200status_t StringPool::addStyleSpan(size_t idx, const String16& name,
201                                  uint32_t start, uint32_t end)
202{
203    entry_style_span span;
204    span.name = name;
205    span.span.firstChar = start;
206    span.span.lastChar = end;
207    return addStyleSpan(idx, span);
208}
209
210status_t StringPool::addStyleSpans(size_t idx, const Vector<entry_style_span>& spans)
211{
212    const size_t N=spans.size();
213    for (size_t i=0; i<N; i++) {
214        status_t err = addStyleSpan(idx, spans[i]);
215        if (err != NO_ERROR) {
216            return err;
217        }
218    }
219    return NO_ERROR;
220}
221
222status_t StringPool::addStyleSpan(size_t idx, const entry_style_span& span)
223{
224    LOG_ALWAYS_FATAL_IF(mSorted, "Can't use styles with sorted string pools.");
225
226    // Place blank entries in the span array up to this index.
227    while (mEntryStyleArray.size() <= idx) {
228        mEntryStyleArray.add();
229    }
230
231    entry_style& style = mEntryStyleArray.editItemAt(idx);
232    style.spans.add(span);
233    mEntries.editItemAt(mEntryArray[idx]).hasStyles = true;
234    return NO_ERROR;
235}
236
237size_t StringPool::size() const
238{
239    return mSorted ? mValues.size() : mEntryArray.size();
240}
241
242const StringPool::entry& StringPool::entryAt(size_t idx) const
243{
244    if (!mSorted) {
245        return mEntries[mEntryArray[idx]];
246    } else {
247        return mEntries[mEntryArray[mValues.valueAt(idx)]];
248    }
249}
250
251size_t StringPool::countIdentifiers() const
252{
253    return mIdents.size();
254}
255
256int StringPool::config_sort(const size_t* lhs, const size_t* rhs, void* state)
257{
258    StringPool* pool = (StringPool*)state;
259    const entry& lhe = pool->mEntries[pool->mEntryArray[*lhs]];
260    const entry& rhe = pool->mEntries[pool->mEntryArray[*rhs]];
261    return lhe.compare(rhe);
262}
263
264void StringPool::sortByConfig()
265{
266    LOG_ALWAYS_FATAL_IF(mSorted, "Can't sort string pool containing identifiers.");
267    LOG_ALWAYS_FATAL_IF(mIdents.size() > 0, "Can't sort string pool containing identifiers.");
268    LOG_ALWAYS_FATAL_IF(mOriginalPosToNewPos.size() > 0, "Can't sort string pool after already sorted.");
269
270    const size_t N = mEntryArray.size();
271
272    // This is a vector that starts out with a 1:1 mapping to entries
273    // in the array, which we will sort to come up with the desired order.
274    // At that point it maps from the new position in the array to the
275    // original position the entry appeared.
276    Vector<size_t> newPosToOriginalPos;
277    for (size_t i=0; i<mEntryArray.size(); i++) {
278        newPosToOriginalPos.add(i);
279    }
280
281    // Sort the array.
282    NOISY(printf("SORTING STRINGS BY CONFIGURATION...\n"));
283    newPosToOriginalPos.sort(config_sort, this);
284    NOISY(printf("DONE SORTING STRINGS BY CONFIGURATION.\n"));
285
286    // Create the reverse mapping from the original position in the array
287    // to the new position where it appears in the sorted array.  This is
288    // so that clients can re-map any positions they had previously stored.
289    mOriginalPosToNewPos = newPosToOriginalPos;
290    for (size_t i=0; i<N; i++) {
291        mOriginalPosToNewPos.editItemAt(newPosToOriginalPos[i]) = i;
292    }
293
294#if 0
295    SortedVector<entry> entries;
296
297    for (size_t i=0; i<N; i++) {
298        printf("#%d was %d: %s\n", i, newPosToOriginalPos[i],
299                mEntries[mEntryArray[newPosToOriginalPos[i]]].makeConfigsString().string());
300        entries.add(mEntries[mEntryArray[i]]);
301    }
302
303    for (size_t i=0; i<entries.size(); i++) {
304        printf("Sorted config #%d: %s\n", i,
305                entries[i].makeConfigsString().string());
306    }
307#endif
308
309    // Now we rebuild the arrays.
310    Vector<entry> newEntries;
311    Vector<size_t> newEntryArray;
312    Vector<entry_style> newEntryStyleArray;
313    DefaultKeyedVector<size_t, size_t> origOffsetToNewOffset;
314
315    for (size_t i=0; i<N; i++) {
316        // We are filling in new offset 'i'; oldI is where we can find it
317        // in the original data structure.
318        size_t oldI = newPosToOriginalPos[i];
319        // This is the actual entry associated with the old offset.
320        const entry& oldEnt = mEntries[mEntryArray[oldI]];
321        // This is the same entry the last time we added it to the
322        // new entry array, if any.
323        ssize_t newIndexOfOffset = origOffsetToNewOffset.indexOfKey(oldI);
324        size_t newOffset;
325        if (newIndexOfOffset < 0) {
326            // This is the first time we have seen the entry, so add
327            // it.
328            newOffset = newEntries.add(oldEnt);
329            newEntries.editItemAt(newOffset).indices.clear();
330        } else {
331            // We have seen this entry before, use the existing one
332            // instead of adding it again.
333            newOffset = origOffsetToNewOffset.valueAt(newIndexOfOffset);
334        }
335        // Update the indices to include this new position.
336        newEntries.editItemAt(newOffset).indices.add(i);
337        // And add the offset of the entry to the new entry array.
338        newEntryArray.add(newOffset);
339        // Add any old style to the new style array.
340        if (mEntryStyleArray.size() > 0) {
341            if (oldI < mEntryStyleArray.size()) {
342                newEntryStyleArray.add(mEntryStyleArray[oldI]);
343            } else {
344                newEntryStyleArray.add(entry_style());
345            }
346        }
347    }
348
349    // Now trim any entries at the end of the new style array that are
350    // not needed.
351    for (ssize_t i=newEntryStyleArray.size()-1; i>=0; i--) {
352        const entry_style& style = newEntryStyleArray[i];
353        if (style.spans.size() > 0) {
354            // That's it.
355            break;
356        }
357        // This one is not needed; remove.
358        newEntryStyleArray.removeAt(i);
359    }
360
361    // All done, install the new data structures and upate mValues with
362    // the new positions.
363    mEntries = newEntries;
364    mEntryArray = newEntryArray;
365    mEntryStyleArray = newEntryStyleArray;
366    mValues.clear();
367    for (size_t i=0; i<mEntries.size(); i++) {
368        const entry& ent = mEntries[i];
369        mValues.add(ent.value, ent.indices[0]);
370    }
371
372#if 0
373    printf("FINAL SORTED STRING CONFIGS:\n");
374    for (size_t i=0; i<mEntries.size(); i++) {
375        const entry& ent = mEntries[i];
376        printf("#" ZD " %s: %s\n", (ZD_TYPE)i, ent.makeConfigsString().string(),
377                String8(ent.value).string());
378    }
379#endif
380}
381
382sp<AaptFile> StringPool::createStringBlock()
383{
384    sp<AaptFile> pool = new AaptFile(String8(), AaptGroupEntry(),
385                                     String8());
386    status_t err = writeStringBlock(pool);
387    return err == NO_ERROR ? pool : NULL;
388}
389
390#define ENCODE_LENGTH(str, chrsz, strSize) \
391{ \
392    size_t maxMask = 1 << ((chrsz*8)-1); \
393    size_t maxSize = maxMask-1; \
394    if (strSize > maxSize) { \
395        *str++ = maxMask | ((strSize>>(chrsz*8))&maxSize); \
396    } \
397    *str++ = strSize; \
398}
399
400status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
401{
402    // Allow appending.  Sorry this is a little wacky.
403    if (pool->getSize() > 0) {
404        sp<AaptFile> block = createStringBlock();
405        if (block == NULL) {
406            return UNKNOWN_ERROR;
407        }
408        ssize_t res = pool->writeData(block->getData(), block->getSize());
409        return (res >= 0) ? (status_t)NO_ERROR : res;
410    }
411
412    // First we need to add all style span names to the string pool.
413    // We do this now (instead of when the span is added) so that these
414    // will appear at the end of the pool, not disrupting the order
415    // our client placed their own strings in it.
416
417    const size_t STYLES = mEntryStyleArray.size();
418    size_t i;
419
420    for (i=0; i<STYLES; i++) {
421        entry_style& style = mEntryStyleArray.editItemAt(i);
422        const size_t N = style.spans.size();
423        for (size_t i=0; i<N; i++) {
424            entry_style_span& span = style.spans.editItemAt(i);
425            ssize_t idx = add(span.name, true);
426            if (idx < 0) {
427                fprintf(stderr, "Error adding span for style tag '%s'\n",
428                        String8(span.name).string());
429                return idx;
430            }
431            span.span.name.index = (uint32_t)idx;
432        }
433    }
434
435    const size_t ENTRIES = size();
436
437    // Now build the pool of unique strings.
438
439    const size_t STRINGS = mEntries.size();
440    const size_t preSize = sizeof(ResStringPool_header)
441                         + (sizeof(uint32_t)*ENTRIES)
442                         + (sizeof(uint32_t)*STYLES);
443    if (pool->editData(preSize) == NULL) {
444        fprintf(stderr, "ERROR: Out of memory for string pool\n");
445        return NO_MEMORY;
446    }
447
448    const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(char16_t);
449
450    size_t strPos = 0;
451    for (i=0; i<STRINGS; i++) {
452        entry& ent = mEntries.editItemAt(i);
453        const size_t strSize = (ent.value.size());
454        const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
455            charSize*2 : charSize;
456
457        String8 encStr;
458        if (mUTF8) {
459            encStr = String8(ent.value);
460        }
461
462        const size_t encSize = mUTF8 ? encStr.size() : 0;
463        const size_t encLenSize = mUTF8 ?
464            (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
465                charSize*2 : charSize) : 0;
466
467        ent.offset = strPos;
468
469        const size_t totalSize = lenSize + encLenSize +
470            ((mUTF8 ? encSize : strSize)+1)*charSize;
471
472        void* dat = (void*)pool->editData(preSize + strPos + totalSize);
473        if (dat == NULL) {
474            fprintf(stderr, "ERROR: Out of memory for string pool\n");
475            return NO_MEMORY;
476        }
477        dat = (uint8_t*)dat + preSize + strPos;
478        if (mUTF8) {
479            uint8_t* strings = (uint8_t*)dat;
480
481            ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
482
483            ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
484
485            strncpy((char*)strings, encStr, encSize+1);
486        } else {
487            uint16_t* strings = (uint16_t*)dat;
488
489            ENCODE_LENGTH(strings, sizeof(uint16_t), strSize)
490
491            strcpy16_htod(strings, ent.value);
492        }
493
494        strPos += totalSize;
495    }
496
497    // Pad ending string position up to a uint32_t boundary.
498
499    if (strPos&0x3) {
500        size_t padPos = ((strPos+3)&~0x3);
501        uint8_t* dat = (uint8_t*)pool->editData(preSize + padPos);
502        if (dat == NULL) {
503            fprintf(stderr, "ERROR: Out of memory padding string pool\n");
504            return NO_MEMORY;
505        }
506        memset(dat+preSize+strPos, 0, padPos-strPos);
507        strPos = padPos;
508    }
509
510    // Build the pool of style spans.
511
512    size_t styPos = strPos;
513    for (i=0; i<STYLES; i++) {
514        entry_style& ent = mEntryStyleArray.editItemAt(i);
515        const size_t N = ent.spans.size();
516        const size_t totalSize = (N*sizeof(ResStringPool_span))
517                               + sizeof(ResStringPool_ref);
518
519        ent.offset = styPos-strPos;
520        uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + totalSize);
521        if (dat == NULL) {
522            fprintf(stderr, "ERROR: Out of memory for string styles\n");
523            return NO_MEMORY;
524        }
525        ResStringPool_span* span = (ResStringPool_span*)(dat+preSize+styPos);
526        for (size_t i=0; i<N; i++) {
527            span->name.index = htodl(ent.spans[i].span.name.index);
528            span->firstChar = htodl(ent.spans[i].span.firstChar);
529            span->lastChar = htodl(ent.spans[i].span.lastChar);
530            span++;
531        }
532        span->name.index = htodl(ResStringPool_span::END);
533
534        styPos += totalSize;
535    }
536
537    if (STYLES > 0) {
538        // Add full terminator at the end (when reading we validate that
539        // the end of the pool is fully terminated to simplify error
540        // checking).
541        size_t extra = sizeof(ResStringPool_span)-sizeof(ResStringPool_ref);
542        uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + extra);
543        if (dat == NULL) {
544            fprintf(stderr, "ERROR: Out of memory for string styles\n");
545            return NO_MEMORY;
546        }
547        uint32_t* p = (uint32_t*)(dat+preSize+styPos);
548        while (extra > 0) {
549            *p++ = htodl(ResStringPool_span::END);
550            extra -= sizeof(uint32_t);
551        }
552        styPos += extra;
553    }
554
555    // Write header.
556
557    ResStringPool_header* header =
558        (ResStringPool_header*)pool->padData(sizeof(uint32_t));
559    if (header == NULL) {
560        fprintf(stderr, "ERROR: Out of memory for string pool\n");
561        return NO_MEMORY;
562    }
563    memset(header, 0, sizeof(*header));
564    header->header.type = htods(RES_STRING_POOL_TYPE);
565    header->header.headerSize = htods(sizeof(*header));
566    header->header.size = htodl(pool->getSize());
567    header->stringCount = htodl(ENTRIES);
568    header->styleCount = htodl(STYLES);
569    if (mSorted) {
570        header->flags |= htodl(ResStringPool_header::SORTED_FLAG);
571    }
572    if (mUTF8) {
573        header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
574    }
575    header->stringsStart = htodl(preSize);
576    header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);
577
578    // Write string index array.
579
580    uint32_t* index = (uint32_t*)(header+1);
581    if (mSorted) {
582        for (i=0; i<ENTRIES; i++) {
583            entry& ent = const_cast<entry&>(entryAt(i));
584            ent.indices.clear();
585            ent.indices.add(i);
586            *index++ = htodl(ent.offset);
587        }
588    } else {
589        for (i=0; i<ENTRIES; i++) {
590            entry& ent = mEntries.editItemAt(mEntryArray[i]);
591            *index++ = htodl(ent.offset);
592            NOISY(printf("Writing entry #%d: \"%s\" ent=%d off=%d\n", i,
593                    String8(ent.value).string(),
594                    mEntryArray[i], ent.offset));
595        }
596    }
597
598    // Write style index array.
599
600    if (mSorted) {
601        for (i=0; i<STYLES; i++) {
602            LOG_ALWAYS_FATAL("Shouldn't be here!");
603        }
604    } else {
605        for (i=0; i<STYLES; i++) {
606            *index++ = htodl(mEntryStyleArray[i].offset);
607        }
608    }
609
610    return NO_ERROR;
611}
612
613ssize_t StringPool::offsetForString(const String16& val) const
614{
615    const Vector<size_t>* indices = offsetsForString(val);
616    ssize_t res = indices != NULL && indices->size() > 0 ? indices->itemAt(0) : -1;
617    NOISY(printf("Offset for string %s: %d (%s)\n", String8(val).string(), res,
618            res >= 0 ? String8(mEntries[mEntryArray[res]].value).string() : String8()));
619    return res;
620}
621
622const Vector<size_t>* StringPool::offsetsForString(const String16& val) const
623{
624    ssize_t pos = mValues.valueFor(val);
625    if (pos < 0) {
626        return NULL;
627    }
628    return &mEntries[mEntryArray[pos]].indices;
629}
630