StringPool.cpp revision 61361f376b47d45966b1ca0d24d51622304c93c3
1//
2// Copyright 2006 The Android Open Source Project
3//
4// Build resource files from raw assets.
5//
6
7#include "StringPool.h"
8#include "ResourceTable.h"
9
10#include <utils/ByteOrder.h>
11#include <utils/SortedVector.h>
12
13#if HAVE_PRINTF_ZD
14#  define ZD "%zd"
15#  define ZD_TYPE ssize_t
16#else
17#  define ZD "%ld"
18#  define ZD_TYPE long
19#endif
20
21#define NOISY(x) //x
22
23void strcpy16_htod(uint16_t* dst, const uint16_t* src)
24{
25    while (*src) {
26        char16_t s = htods(*src);
27        *dst++ = s;
28        src++;
29    }
30    *dst = 0;
31}
32
33void printStringPool(const ResStringPool* pool)
34{
35    SortedVector<const void*> uniqueStrings;
36    const size_t N = pool->size();
37    for (size_t i=0; i<N; i++) {
38        size_t len;
39        if (pool->isUTF8()) {
40            uniqueStrings.add(pool->string8At(i, &len));
41        } else {
42            uniqueStrings.add(pool->stringAt(i, &len));
43        }
44    }
45
46    printf("String pool of " ZD " unique %s %s strings, " ZD " entries and "
47            ZD " styles using " ZD " bytes:\n",
48            (ZD_TYPE)uniqueStrings.size(), pool->isUTF8() ? "UTF-8" : "UTF-16",
49            pool->isSorted() ? "sorted" : "non-sorted",
50            (ZD_TYPE)N, (ZD_TYPE)pool->styleCount(), (ZD_TYPE)pool->bytes());
51
52    const size_t NS = pool->size();
53    for (size_t s=0; s<NS; s++) {
54        String8 str = pool->string8ObjectAt(s);
55        printf("String #" ZD ": %s\n", (ZD_TYPE) s, str.string());
56    }
57}
58
59String8 StringPool::entry::makeConfigsString() const {
60    String8 configStr(configTypeName);
61    if (configStr.size() > 0) configStr.append(" ");
62    if (configs.size() > 0) {
63        for (size_t j=0; j<configs.size(); j++) {
64            if (j > 0) configStr.append(", ");
65            configStr.append(configs[j].toString());
66        }
67    } else {
68        configStr = "(none)";
69    }
70    return configStr;
71}
72
73int StringPool::entry::compare(const entry& o) const {
74    // Strings with styles go first, to reduce the size of the styles array.
75    // We don't care about the relative order of these strings.
76    if (hasStyles) {
77        return o.hasStyles ? 0 : -1;
78    }
79    if (o.hasStyles) {
80        return 1;
81    }
82
83    // Sort unstyled strings by type, then by logical configuration.
84    int comp = configTypeName.compare(o.configTypeName);
85    if (comp != 0) {
86        return comp;
87    }
88    const size_t LHN = configs.size();
89    const size_t RHN = o.configs.size();
90    size_t i=0;
91    while (i < LHN && i < RHN) {
92        comp = configs[i].compareLogical(o.configs[i]);
93        if (comp != 0) {
94            return comp;
95        }
96        i++;
97    }
98    if (LHN < RHN) return -1;
99    else if (LHN > RHN) return 1;
100    return 0;
101}
102
103StringPool::StringPool(bool utf8) :
104        mUTF8(utf8), mValues(-1)
105{
106}
107
108ssize_t StringPool::add(const String16& value, const Vector<entry_style_span>& spans,
109        const String8* configTypeName, const ResTable_config* config)
110{
111    ssize_t res = add(value, false, configTypeName, config);
112    if (res >= 0) {
113        addStyleSpans(res, spans);
114    }
115    return res;
116}
117
118ssize_t StringPool::add(const String16& value,
119        bool mergeDuplicates, const String8* configTypeName, const ResTable_config* config)
120{
121    ssize_t vidx = mValues.indexOfKey(value);
122    ssize_t pos = vidx >= 0 ? mValues.valueAt(vidx) : -1;
123    ssize_t eidx = pos >= 0 ? mEntryArray.itemAt(pos) : -1;
124    if (eidx < 0) {
125        eidx = mEntries.add(entry(value));
126        if (eidx < 0) {
127            fprintf(stderr, "Failure adding string %s\n", String8(value).string());
128            return eidx;
129        }
130    }
131
132    if (configTypeName != NULL) {
133        entry& ent = mEntries.editItemAt(eidx);
134        NOISY(printf("*** adding config type name %s, was %s\n",
135                configTypeName->string(), ent.configTypeName.string()));
136        if (ent.configTypeName.size() <= 0) {
137            ent.configTypeName = *configTypeName;
138        } else if (ent.configTypeName != *configTypeName) {
139            ent.configTypeName = " ";
140        }
141    }
142
143    if (config != NULL) {
144        // Add this to the set of configs associated with the string.
145        entry& ent = mEntries.editItemAt(eidx);
146        size_t addPos;
147        for (addPos=0; addPos<ent.configs.size(); addPos++) {
148            int cmp = ent.configs.itemAt(addPos).compareLogical(*config);
149            if (cmp >= 0) {
150                if (cmp > 0) {
151                    NOISY(printf("*** inserting config: %s\n", config->toString().string()));
152                    ent.configs.insertAt(*config, addPos);
153                }
154                break;
155            }
156        }
157        if (addPos >= ent.configs.size()) {
158            NOISY(printf("*** adding config: %s\n", config->toString().string()));
159            ent.configs.add(*config);
160        }
161    }
162
163    const bool first = vidx < 0;
164    const bool styled = (pos >= 0 && (size_t)pos < mEntryStyleArray.size()) ?
165        mEntryStyleArray[pos].spans.size() : 0;
166    if (first || styled || !mergeDuplicates) {
167        pos = mEntryArray.add(eidx);
168        if (first) {
169            vidx = mValues.add(value, pos);
170        }
171        entry& ent = mEntries.editItemAt(eidx);
172        ent.indices.add(pos);
173    }
174
175    NOISY(printf("Adding string %s to pool: pos=%d eidx=%d vidx=%d\n",
176            String8(value).string(), pos, eidx, vidx));
177
178    return pos;
179}
180
181status_t StringPool::addStyleSpan(size_t idx, const String16& name,
182                                  uint32_t start, uint32_t end)
183{
184    entry_style_span span;
185    span.name = name;
186    span.span.firstChar = start;
187    span.span.lastChar = end;
188    return addStyleSpan(idx, span);
189}
190
191status_t StringPool::addStyleSpans(size_t idx, const Vector<entry_style_span>& spans)
192{
193    const size_t N=spans.size();
194    for (size_t i=0; i<N; i++) {
195        status_t err = addStyleSpan(idx, spans[i]);
196        if (err != NO_ERROR) {
197            return err;
198        }
199    }
200    return NO_ERROR;
201}
202
203status_t StringPool::addStyleSpan(size_t idx, const entry_style_span& span)
204{
205    // Place blank entries in the span array up to this index.
206    while (mEntryStyleArray.size() <= idx) {
207        mEntryStyleArray.add();
208    }
209
210    entry_style& style = mEntryStyleArray.editItemAt(idx);
211    style.spans.add(span);
212    mEntries.editItemAt(mEntryArray[idx]).hasStyles = true;
213    return NO_ERROR;
214}
215
216int StringPool::config_sort(const size_t* lhs, const size_t* rhs, void* state)
217{
218    StringPool* pool = (StringPool*)state;
219    const entry& lhe = pool->mEntries[pool->mEntryArray[*lhs]];
220    const entry& rhe = pool->mEntries[pool->mEntryArray[*rhs]];
221    return lhe.compare(rhe);
222}
223
224void StringPool::sortByConfig()
225{
226    LOG_ALWAYS_FATAL_IF(mOriginalPosToNewPos.size() > 0, "Can't sort string pool after already sorted.");
227
228    const size_t N = mEntryArray.size();
229
230    // This is a vector that starts out with a 1:1 mapping to entries
231    // in the array, which we will sort to come up with the desired order.
232    // At that point it maps from the new position in the array to the
233    // original position the entry appeared.
234    Vector<size_t> newPosToOriginalPos;
235    for (size_t i=0; i<mEntryArray.size(); i++) {
236        newPosToOriginalPos.add(i);
237    }
238
239    // Sort the array.
240    NOISY(printf("SORTING STRINGS BY CONFIGURATION...\n"));
241    newPosToOriginalPos.sort(config_sort, this);
242    NOISY(printf("DONE SORTING STRINGS BY CONFIGURATION.\n"));
243
244    // Create the reverse mapping from the original position in the array
245    // to the new position where it appears in the sorted array.  This is
246    // so that clients can re-map any positions they had previously stored.
247    mOriginalPosToNewPos = newPosToOriginalPos;
248    for (size_t i=0; i<N; i++) {
249        mOriginalPosToNewPos.editItemAt(newPosToOriginalPos[i]) = i;
250    }
251
252#if 0
253    SortedVector<entry> entries;
254
255    for (size_t i=0; i<N; i++) {
256        printf("#%d was %d: %s\n", i, newPosToOriginalPos[i],
257                mEntries[mEntryArray[newPosToOriginalPos[i]]].makeConfigsString().string());
258        entries.add(mEntries[mEntryArray[i]]);
259    }
260
261    for (size_t i=0; i<entries.size(); i++) {
262        printf("Sorted config #%d: %s\n", i,
263                entries[i].makeConfigsString().string());
264    }
265#endif
266
267    // Now we rebuild the arrays.
268    Vector<entry> newEntries;
269    Vector<size_t> newEntryArray;
270    Vector<entry_style> newEntryStyleArray;
271    DefaultKeyedVector<size_t, size_t> origOffsetToNewOffset;
272
273    for (size_t i=0; i<N; i++) {
274        // We are filling in new offset 'i'; oldI is where we can find it
275        // in the original data structure.
276        size_t oldI = newPosToOriginalPos[i];
277        // This is the actual entry associated with the old offset.
278        const entry& oldEnt = mEntries[mEntryArray[oldI]];
279        // This is the same entry the last time we added it to the
280        // new entry array, if any.
281        ssize_t newIndexOfOffset = origOffsetToNewOffset.indexOfKey(oldI);
282        size_t newOffset;
283        if (newIndexOfOffset < 0) {
284            // This is the first time we have seen the entry, so add
285            // it.
286            newOffset = newEntries.add(oldEnt);
287            newEntries.editItemAt(newOffset).indices.clear();
288        } else {
289            // We have seen this entry before, use the existing one
290            // instead of adding it again.
291            newOffset = origOffsetToNewOffset.valueAt(newIndexOfOffset);
292        }
293        // Update the indices to include this new position.
294        newEntries.editItemAt(newOffset).indices.add(i);
295        // And add the offset of the entry to the new entry array.
296        newEntryArray.add(newOffset);
297        // Add any old style to the new style array.
298        if (mEntryStyleArray.size() > 0) {
299            if (oldI < mEntryStyleArray.size()) {
300                newEntryStyleArray.add(mEntryStyleArray[oldI]);
301            } else {
302                newEntryStyleArray.add(entry_style());
303            }
304        }
305    }
306
307    // Now trim any entries at the end of the new style array that are
308    // not needed.
309    for (ssize_t i=newEntryStyleArray.size()-1; i>=0; i--) {
310        const entry_style& style = newEntryStyleArray[i];
311        if (style.spans.size() > 0) {
312            // That's it.
313            break;
314        }
315        // This one is not needed; remove.
316        newEntryStyleArray.removeAt(i);
317    }
318
319    // All done, install the new data structures and upate mValues with
320    // the new positions.
321    mEntries = newEntries;
322    mEntryArray = newEntryArray;
323    mEntryStyleArray = newEntryStyleArray;
324    mValues.clear();
325    for (size_t i=0; i<mEntries.size(); i++) {
326        const entry& ent = mEntries[i];
327        mValues.add(ent.value, ent.indices[0]);
328    }
329
330#if 0
331    printf("FINAL SORTED STRING CONFIGS:\n");
332    for (size_t i=0; i<mEntries.size(); i++) {
333        const entry& ent = mEntries[i];
334        printf("#" ZD " %s: %s\n", (ZD_TYPE)i, ent.makeConfigsString().string(),
335                String8(ent.value).string());
336    }
337#endif
338}
339
340sp<AaptFile> StringPool::createStringBlock()
341{
342    sp<AaptFile> pool = new AaptFile(String8(), AaptGroupEntry(),
343                                     String8());
344    status_t err = writeStringBlock(pool);
345    return err == NO_ERROR ? pool : NULL;
346}
347
348#define ENCODE_LENGTH(str, chrsz, strSize) \
349{ \
350    size_t maxMask = 1 << ((chrsz*8)-1); \
351    size_t maxSize = maxMask-1; \
352    if (strSize > maxSize) { \
353        *str++ = maxMask | ((strSize>>(chrsz*8))&maxSize); \
354    } \
355    *str++ = strSize; \
356}
357
358status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
359{
360    // Allow appending.  Sorry this is a little wacky.
361    if (pool->getSize() > 0) {
362        sp<AaptFile> block = createStringBlock();
363        if (block == NULL) {
364            return UNKNOWN_ERROR;
365        }
366        ssize_t res = pool->writeData(block->getData(), block->getSize());
367        return (res >= 0) ? (status_t)NO_ERROR : res;
368    }
369
370    // First we need to add all style span names to the string pool.
371    // We do this now (instead of when the span is added) so that these
372    // will appear at the end of the pool, not disrupting the order
373    // our client placed their own strings in it.
374
375    const size_t STYLES = mEntryStyleArray.size();
376    size_t i;
377
378    for (i=0; i<STYLES; i++) {
379        entry_style& style = mEntryStyleArray.editItemAt(i);
380        const size_t N = style.spans.size();
381        for (size_t i=0; i<N; i++) {
382            entry_style_span& span = style.spans.editItemAt(i);
383            ssize_t idx = add(span.name, true);
384            if (idx < 0) {
385                fprintf(stderr, "Error adding span for style tag '%s'\n",
386                        String8(span.name).string());
387                return idx;
388            }
389            span.span.name.index = (uint32_t)idx;
390        }
391    }
392
393    const size_t ENTRIES = mEntryArray.size();
394
395    // Now build the pool of unique strings.
396
397    const size_t STRINGS = mEntries.size();
398    const size_t preSize = sizeof(ResStringPool_header)
399                         + (sizeof(uint32_t)*ENTRIES)
400                         + (sizeof(uint32_t)*STYLES);
401    if (pool->editData(preSize) == NULL) {
402        fprintf(stderr, "ERROR: Out of memory for string pool\n");
403        return NO_MEMORY;
404    }
405
406    const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(char16_t);
407
408    size_t strPos = 0;
409    for (i=0; i<STRINGS; i++) {
410        entry& ent = mEntries.editItemAt(i);
411        const size_t strSize = (ent.value.size());
412        const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
413            charSize*2 : charSize;
414
415        String8 encStr;
416        if (mUTF8) {
417            encStr = String8(ent.value);
418        }
419
420        const size_t encSize = mUTF8 ? encStr.size() : 0;
421        const size_t encLenSize = mUTF8 ?
422            (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
423                charSize*2 : charSize) : 0;
424
425        ent.offset = strPos;
426
427        const size_t totalSize = lenSize + encLenSize +
428            ((mUTF8 ? encSize : strSize)+1)*charSize;
429
430        void* dat = (void*)pool->editData(preSize + strPos + totalSize);
431        if (dat == NULL) {
432            fprintf(stderr, "ERROR: Out of memory for string pool\n");
433            return NO_MEMORY;
434        }
435        dat = (uint8_t*)dat + preSize + strPos;
436        if (mUTF8) {
437            uint8_t* strings = (uint8_t*)dat;
438
439            ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
440
441            ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
442
443            strncpy((char*)strings, encStr, encSize+1);
444        } else {
445            uint16_t* strings = (uint16_t*)dat;
446
447            ENCODE_LENGTH(strings, sizeof(uint16_t), strSize)
448
449            strcpy16_htod(strings, ent.value);
450        }
451
452        strPos += totalSize;
453    }
454
455    // Pad ending string position up to a uint32_t boundary.
456
457    if (strPos&0x3) {
458        size_t padPos = ((strPos+3)&~0x3);
459        uint8_t* dat = (uint8_t*)pool->editData(preSize + padPos);
460        if (dat == NULL) {
461            fprintf(stderr, "ERROR: Out of memory padding string pool\n");
462            return NO_MEMORY;
463        }
464        memset(dat+preSize+strPos, 0, padPos-strPos);
465        strPos = padPos;
466    }
467
468    // Build the pool of style spans.
469
470    size_t styPos = strPos;
471    for (i=0; i<STYLES; i++) {
472        entry_style& ent = mEntryStyleArray.editItemAt(i);
473        const size_t N = ent.spans.size();
474        const size_t totalSize = (N*sizeof(ResStringPool_span))
475                               + sizeof(ResStringPool_ref);
476
477        ent.offset = styPos-strPos;
478        uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + totalSize);
479        if (dat == NULL) {
480            fprintf(stderr, "ERROR: Out of memory for string styles\n");
481            return NO_MEMORY;
482        }
483        ResStringPool_span* span = (ResStringPool_span*)(dat+preSize+styPos);
484        for (size_t i=0; i<N; i++) {
485            span->name.index = htodl(ent.spans[i].span.name.index);
486            span->firstChar = htodl(ent.spans[i].span.firstChar);
487            span->lastChar = htodl(ent.spans[i].span.lastChar);
488            span++;
489        }
490        span->name.index = htodl(ResStringPool_span::END);
491
492        styPos += totalSize;
493    }
494
495    if (STYLES > 0) {
496        // Add full terminator at the end (when reading we validate that
497        // the end of the pool is fully terminated to simplify error
498        // checking).
499        size_t extra = sizeof(ResStringPool_span)-sizeof(ResStringPool_ref);
500        uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + extra);
501        if (dat == NULL) {
502            fprintf(stderr, "ERROR: Out of memory for string styles\n");
503            return NO_MEMORY;
504        }
505        uint32_t* p = (uint32_t*)(dat+preSize+styPos);
506        while (extra > 0) {
507            *p++ = htodl(ResStringPool_span::END);
508            extra -= sizeof(uint32_t);
509        }
510        styPos += extra;
511    }
512
513    // Write header.
514
515    ResStringPool_header* header =
516        (ResStringPool_header*)pool->padData(sizeof(uint32_t));
517    if (header == NULL) {
518        fprintf(stderr, "ERROR: Out of memory for string pool\n");
519        return NO_MEMORY;
520    }
521    memset(header, 0, sizeof(*header));
522    header->header.type = htods(RES_STRING_POOL_TYPE);
523    header->header.headerSize = htods(sizeof(*header));
524    header->header.size = htodl(pool->getSize());
525    header->stringCount = htodl(ENTRIES);
526    header->styleCount = htodl(STYLES);
527    if (mUTF8) {
528        header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
529    }
530    header->stringsStart = htodl(preSize);
531    header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);
532
533    // Write string index array.
534
535    uint32_t* index = (uint32_t*)(header+1);
536    for (i=0; i<ENTRIES; i++) {
537        entry& ent = mEntries.editItemAt(mEntryArray[i]);
538        *index++ = htodl(ent.offset);
539        NOISY(printf("Writing entry #%d: \"%s\" ent=%d off=%d\n", i,
540                String8(ent.value).string(),
541                mEntryArray[i], ent.offset));
542    }
543
544    // Write style index array.
545
546    for (i=0; i<STYLES; i++) {
547        *index++ = htodl(mEntryStyleArray[i].offset);
548    }
549
550    return NO_ERROR;
551}
552
553ssize_t StringPool::offsetForString(const String16& val) const
554{
555    const Vector<size_t>* indices = offsetsForString(val);
556    ssize_t res = indices != NULL && indices->size() > 0 ? indices->itemAt(0) : -1;
557    NOISY(printf("Offset for string %s: %d (%s)\n", String8(val).string(), res,
558            res >= 0 ? String8(mEntries[mEntryArray[res]].value).string() : String8()));
559    return res;
560}
561
562const Vector<size_t>* StringPool::offsetsForString(const String16& val) const
563{
564    ssize_t pos = mValues.valueFor(val);
565    if (pos < 0) {
566        return NULL;
567    }
568    return &mEntries[mEntryArray[pos]].indices;
569}
570