StringPool.cpp revision fe75d62eba3935d2d1a4a7790fc7459ded241ad3
1//
2// Copyright 2006 The Android Open Source Project
3//
4// Build resource files from raw assets.
5//
6
7#include "StringPool.h"
8#include "ResourceTable.h"
9
10#include <utils/ByteOrder.h>
11#include <utils/SortedVector.h>
12#include <cutils/qsort_r_compat.h>
13
14#if HAVE_PRINTF_ZD
15#  define ZD "%zd"
16#  define ZD_TYPE ssize_t
17#else
18#  define ZD "%ld"
19#  define ZD_TYPE long
20#endif
21
22#define NOISY(x) //x
23
24void strcpy16_htod(uint16_t* dst, const uint16_t* src)
25{
26    while (*src) {
27        char16_t s = htods(*src);
28        *dst++ = s;
29        src++;
30    }
31    *dst = 0;
32}
33
34void printStringPool(const ResStringPool* pool)
35{
36    SortedVector<const void*> uniqueStrings;
37    const size_t N = pool->size();
38    for (size_t i=0; i<N; i++) {
39        size_t len;
40        if (pool->isUTF8()) {
41            uniqueStrings.add(pool->string8At(i, &len));
42        } else {
43            uniqueStrings.add(pool->stringAt(i, &len));
44        }
45    }
46
47    printf("String pool of " ZD " unique %s %s strings, " ZD " entries and "
48            ZD " styles using " ZD " bytes:\n",
49            (ZD_TYPE)uniqueStrings.size(), pool->isUTF8() ? "UTF-8" : "UTF-16",
50            pool->isSorted() ? "sorted" : "non-sorted",
51            (ZD_TYPE)N, (ZD_TYPE)pool->styleCount(), (ZD_TYPE)pool->bytes());
52
53    const size_t NS = pool->size();
54    for (size_t s=0; s<NS; s++) {
55        String8 str = pool->string8ObjectAt(s);
56        printf("String #" ZD ": %s\n", (ZD_TYPE) s, str.string());
57    }
58}
59
60String8 StringPool::entry::makeConfigsString() const {
61    String8 configStr(configTypeName);
62    if (configStr.size() > 0) configStr.append(" ");
63    if (configs.size() > 0) {
64        for (size_t j=0; j<configs.size(); j++) {
65            if (j > 0) configStr.append(", ");
66            configStr.append(configs[j].toString());
67        }
68    } else {
69        configStr = "(none)";
70    }
71    return configStr;
72}
73
74int StringPool::entry::compare(const entry& o) const {
75    // Strings with styles go first, to reduce the size of the styles array.
76    // We don't care about the relative order of these strings.
77    if (hasStyles) {
78        return o.hasStyles ? 0 : -1;
79    }
80    if (o.hasStyles) {
81        return 1;
82    }
83
84    // Sort unstyled strings by type, then by logical configuration.
85    int comp = configTypeName.compare(o.configTypeName);
86    if (comp != 0) {
87        return comp;
88    }
89    const size_t LHN = configs.size();
90    const size_t RHN = o.configs.size();
91    size_t i=0;
92    while (i < LHN && i < RHN) {
93        comp = configs[i].compareLogical(o.configs[i]);
94        if (comp != 0) {
95            return comp;
96        }
97        i++;
98    }
99    if (LHN < RHN) return -1;
100    else if (LHN > RHN) return 1;
101    return 0;
102}
103
104StringPool::StringPool(bool utf8) :
105        mUTF8(utf8), mValues(-1)
106{
107}
108
109ssize_t StringPool::add(const String16& value, const Vector<entry_style_span>& spans,
110        const String8* configTypeName, const ResTable_config* config)
111{
112    ssize_t res = add(value, false, configTypeName, config);
113    if (res >= 0) {
114        addStyleSpans(res, spans);
115    }
116    return res;
117}
118
119ssize_t StringPool::add(const String16& value,
120        bool mergeDuplicates, const String8* configTypeName, const ResTable_config* config)
121{
122    ssize_t vidx = mValues.indexOfKey(value);
123    ssize_t pos = vidx >= 0 ? mValues.valueAt(vidx) : -1;
124    ssize_t eidx = pos >= 0 ? mEntryArray.itemAt(pos) : -1;
125    if (eidx < 0) {
126        eidx = mEntries.add(entry(value));
127        if (eidx < 0) {
128            fprintf(stderr, "Failure adding string %s\n", String8(value).string());
129            return eidx;
130        }
131    }
132
133    if (configTypeName != NULL) {
134        entry& ent = mEntries.editItemAt(eidx);
135        NOISY(printf("*** adding config type name %s, was %s\n",
136                configTypeName->string(), ent.configTypeName.string()));
137        if (ent.configTypeName.size() <= 0) {
138            ent.configTypeName = *configTypeName;
139        } else if (ent.configTypeName != *configTypeName) {
140            ent.configTypeName = " ";
141        }
142    }
143
144    if (config != NULL) {
145        // Add this to the set of configs associated with the string.
146        entry& ent = mEntries.editItemAt(eidx);
147        size_t addPos;
148        for (addPos=0; addPos<ent.configs.size(); addPos++) {
149            int cmp = ent.configs.itemAt(addPos).compareLogical(*config);
150            if (cmp >= 0) {
151                if (cmp > 0) {
152                    NOISY(printf("*** inserting config: %s\n", config->toString().string()));
153                    ent.configs.insertAt(*config, addPos);
154                }
155                break;
156            }
157        }
158        if (addPos >= ent.configs.size()) {
159            NOISY(printf("*** adding config: %s\n", config->toString().string()));
160            ent.configs.add(*config);
161        }
162    }
163
164    const bool first = vidx < 0;
165    const bool styled = (pos >= 0 && (size_t)pos < mEntryStyleArray.size()) ?
166        mEntryStyleArray[pos].spans.size() : 0;
167    if (first || styled || !mergeDuplicates) {
168        pos = mEntryArray.add(eidx);
169        if (first) {
170            vidx = mValues.add(value, pos);
171        }
172        entry& ent = mEntries.editItemAt(eidx);
173        ent.indices.add(pos);
174    }
175
176    NOISY(printf("Adding string %s to pool: pos=%d eidx=%d vidx=%d\n",
177            String8(value).string(), pos, eidx, vidx));
178
179    return pos;
180}
181
182status_t StringPool::addStyleSpan(size_t idx, const String16& name,
183                                  uint32_t start, uint32_t end)
184{
185    entry_style_span span;
186    span.name = name;
187    span.span.firstChar = start;
188    span.span.lastChar = end;
189    return addStyleSpan(idx, span);
190}
191
192status_t StringPool::addStyleSpans(size_t idx, const Vector<entry_style_span>& spans)
193{
194    const size_t N=spans.size();
195    for (size_t i=0; i<N; i++) {
196        status_t err = addStyleSpan(idx, spans[i]);
197        if (err != NO_ERROR) {
198            return err;
199        }
200    }
201    return NO_ERROR;
202}
203
204status_t StringPool::addStyleSpan(size_t idx, const entry_style_span& span)
205{
206    // Place blank entries in the span array up to this index.
207    while (mEntryStyleArray.size() <= idx) {
208        mEntryStyleArray.add();
209    }
210
211    entry_style& style = mEntryStyleArray.editItemAt(idx);
212    style.spans.add(span);
213    mEntries.editItemAt(mEntryArray[idx]).hasStyles = true;
214    return NO_ERROR;
215}
216
217int StringPool::config_sort(void* state, const void* lhs, const void* rhs)
218{
219    StringPool* pool = (StringPool*)state;
220    const entry& lhe = pool->mEntries[pool->mEntryArray[*static_cast<const size_t*>(lhs)]];
221    const entry& rhe = pool->mEntries[pool->mEntryArray[*static_cast<const size_t*>(rhs)]];
222    return lhe.compare(rhe);
223}
224
225void StringPool::sortByConfig()
226{
227    LOG_ALWAYS_FATAL_IF(mOriginalPosToNewPos.size() > 0, "Can't sort string pool after already sorted.");
228
229    const size_t N = mEntryArray.size();
230
231    // This is a vector that starts out with a 1:1 mapping to entries
232    // in the array, which we will sort to come up with the desired order.
233    // At that point it maps from the new position in the array to the
234    // original position the entry appeared.
235    Vector<size_t> newPosToOriginalPos;
236    newPosToOriginalPos.setCapacity(N);
237    for (size_t i=0; i < N; i++) {
238        newPosToOriginalPos.add(i);
239    }
240
241    // Sort the array.
242    NOISY(printf("SORTING STRINGS BY CONFIGURATION...\n"));
243    // Vector::sort uses insertion sort, which is very slow for this data set.
244    // Use quicksort instead because we don't need a stable sort here.
245    qsort_r_compat(newPosToOriginalPos.editArray(), N, sizeof(size_t), this, config_sort);
246    //newPosToOriginalPos.sort(config_sort, this);
247    NOISY(printf("DONE SORTING STRINGS BY CONFIGURATION.\n"));
248
249    // Create the reverse mapping from the original position in the array
250    // to the new position where it appears in the sorted array.  This is
251    // so that clients can re-map any positions they had previously stored.
252    mOriginalPosToNewPos = newPosToOriginalPos;
253    for (size_t i=0; i<N; i++) {
254        mOriginalPosToNewPos.editItemAt(newPosToOriginalPos[i]) = i;
255    }
256
257#if 0
258    SortedVector<entry> entries;
259
260    for (size_t i=0; i<N; i++) {
261        printf("#%d was %d: %s\n", i, newPosToOriginalPos[i],
262                mEntries[mEntryArray[newPosToOriginalPos[i]]].makeConfigsString().string());
263        entries.add(mEntries[mEntryArray[i]]);
264    }
265
266    for (size_t i=0; i<entries.size(); i++) {
267        printf("Sorted config #%d: %s\n", i,
268                entries[i].makeConfigsString().string());
269    }
270#endif
271
272    // Now we rebuild the arrays.
273    Vector<entry> newEntries;
274    Vector<size_t> newEntryArray;
275    Vector<entry_style> newEntryStyleArray;
276    DefaultKeyedVector<size_t, size_t> origOffsetToNewOffset;
277
278    for (size_t i=0; i<N; i++) {
279        // We are filling in new offset 'i'; oldI is where we can find it
280        // in the original data structure.
281        size_t oldI = newPosToOriginalPos[i];
282        // This is the actual entry associated with the old offset.
283        const entry& oldEnt = mEntries[mEntryArray[oldI]];
284        // This is the same entry the last time we added it to the
285        // new entry array, if any.
286        ssize_t newIndexOfOffset = origOffsetToNewOffset.indexOfKey(oldI);
287        size_t newOffset;
288        if (newIndexOfOffset < 0) {
289            // This is the first time we have seen the entry, so add
290            // it.
291            newOffset = newEntries.add(oldEnt);
292            newEntries.editItemAt(newOffset).indices.clear();
293        } else {
294            // We have seen this entry before, use the existing one
295            // instead of adding it again.
296            newOffset = origOffsetToNewOffset.valueAt(newIndexOfOffset);
297        }
298        // Update the indices to include this new position.
299        newEntries.editItemAt(newOffset).indices.add(i);
300        // And add the offset of the entry to the new entry array.
301        newEntryArray.add(newOffset);
302        // Add any old style to the new style array.
303        if (mEntryStyleArray.size() > 0) {
304            if (oldI < mEntryStyleArray.size()) {
305                newEntryStyleArray.add(mEntryStyleArray[oldI]);
306            } else {
307                newEntryStyleArray.add(entry_style());
308            }
309        }
310    }
311
312    // Now trim any entries at the end of the new style array that are
313    // not needed.
314    for (ssize_t i=newEntryStyleArray.size()-1; i>=0; i--) {
315        const entry_style& style = newEntryStyleArray[i];
316        if (style.spans.size() > 0) {
317            // That's it.
318            break;
319        }
320        // This one is not needed; remove.
321        newEntryStyleArray.removeAt(i);
322    }
323
324    // All done, install the new data structures and upate mValues with
325    // the new positions.
326    mEntries = newEntries;
327    mEntryArray = newEntryArray;
328    mEntryStyleArray = newEntryStyleArray;
329    mValues.clear();
330    for (size_t i=0; i<mEntries.size(); i++) {
331        const entry& ent = mEntries[i];
332        mValues.add(ent.value, ent.indices[0]);
333    }
334
335#if 0
336    printf("FINAL SORTED STRING CONFIGS:\n");
337    for (size_t i=0; i<mEntries.size(); i++) {
338        const entry& ent = mEntries[i];
339        printf("#" ZD " %s: %s\n", (ZD_TYPE)i, ent.makeConfigsString().string(),
340                String8(ent.value).string());
341    }
342#endif
343}
344
345sp<AaptFile> StringPool::createStringBlock()
346{
347    sp<AaptFile> pool = new AaptFile(String8(), AaptGroupEntry(),
348                                     String8());
349    status_t err = writeStringBlock(pool);
350    return err == NO_ERROR ? pool : NULL;
351}
352
353#define ENCODE_LENGTH(str, chrsz, strSize) \
354{ \
355    size_t maxMask = 1 << ((chrsz*8)-1); \
356    size_t maxSize = maxMask-1; \
357    if (strSize > maxSize) { \
358        *str++ = maxMask | ((strSize>>(chrsz*8))&maxSize); \
359    } \
360    *str++ = strSize; \
361}
362
363status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
364{
365    // Allow appending.  Sorry this is a little wacky.
366    if (pool->getSize() > 0) {
367        sp<AaptFile> block = createStringBlock();
368        if (block == NULL) {
369            return UNKNOWN_ERROR;
370        }
371        ssize_t res = pool->writeData(block->getData(), block->getSize());
372        return (res >= 0) ? (status_t)NO_ERROR : res;
373    }
374
375    // First we need to add all style span names to the string pool.
376    // We do this now (instead of when the span is added) so that these
377    // will appear at the end of the pool, not disrupting the order
378    // our client placed their own strings in it.
379
380    const size_t STYLES = mEntryStyleArray.size();
381    size_t i;
382
383    for (i=0; i<STYLES; i++) {
384        entry_style& style = mEntryStyleArray.editItemAt(i);
385        const size_t N = style.spans.size();
386        for (size_t i=0; i<N; i++) {
387            entry_style_span& span = style.spans.editItemAt(i);
388            ssize_t idx = add(span.name, true);
389            if (idx < 0) {
390                fprintf(stderr, "Error adding span for style tag '%s'\n",
391                        String8(span.name).string());
392                return idx;
393            }
394            span.span.name.index = (uint32_t)idx;
395        }
396    }
397
398    const size_t ENTRIES = mEntryArray.size();
399
400    // Now build the pool of unique strings.
401
402    const size_t STRINGS = mEntries.size();
403    const size_t preSize = sizeof(ResStringPool_header)
404                         + (sizeof(uint32_t)*ENTRIES)
405                         + (sizeof(uint32_t)*STYLES);
406    if (pool->editData(preSize) == NULL) {
407        fprintf(stderr, "ERROR: Out of memory for string pool\n");
408        return NO_MEMORY;
409    }
410
411    const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(char16_t);
412
413    size_t strPos = 0;
414    for (i=0; i<STRINGS; i++) {
415        entry& ent = mEntries.editItemAt(i);
416        const size_t strSize = (ent.value.size());
417        const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
418            charSize*2 : charSize;
419
420        String8 encStr;
421        if (mUTF8) {
422            encStr = String8(ent.value);
423        }
424
425        const size_t encSize = mUTF8 ? encStr.size() : 0;
426        const size_t encLenSize = mUTF8 ?
427            (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
428                charSize*2 : charSize) : 0;
429
430        ent.offset = strPos;
431
432        const size_t totalSize = lenSize + encLenSize +
433            ((mUTF8 ? encSize : strSize)+1)*charSize;
434
435        void* dat = (void*)pool->editData(preSize + strPos + totalSize);
436        if (dat == NULL) {
437            fprintf(stderr, "ERROR: Out of memory for string pool\n");
438            return NO_MEMORY;
439        }
440        dat = (uint8_t*)dat + preSize + strPos;
441        if (mUTF8) {
442            uint8_t* strings = (uint8_t*)dat;
443
444            ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
445
446            ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
447
448            strncpy((char*)strings, encStr, encSize+1);
449        } else {
450            uint16_t* strings = (uint16_t*)dat;
451
452            ENCODE_LENGTH(strings, sizeof(uint16_t), strSize)
453
454            strcpy16_htod(strings, ent.value);
455        }
456
457        strPos += totalSize;
458    }
459
460    // Pad ending string position up to a uint32_t boundary.
461
462    if (strPos&0x3) {
463        size_t padPos = ((strPos+3)&~0x3);
464        uint8_t* dat = (uint8_t*)pool->editData(preSize + padPos);
465        if (dat == NULL) {
466            fprintf(stderr, "ERROR: Out of memory padding string pool\n");
467            return NO_MEMORY;
468        }
469        memset(dat+preSize+strPos, 0, padPos-strPos);
470        strPos = padPos;
471    }
472
473    // Build the pool of style spans.
474
475    size_t styPos = strPos;
476    for (i=0; i<STYLES; i++) {
477        entry_style& ent = mEntryStyleArray.editItemAt(i);
478        const size_t N = ent.spans.size();
479        const size_t totalSize = (N*sizeof(ResStringPool_span))
480                               + sizeof(ResStringPool_ref);
481
482        ent.offset = styPos-strPos;
483        uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + totalSize);
484        if (dat == NULL) {
485            fprintf(stderr, "ERROR: Out of memory for string styles\n");
486            return NO_MEMORY;
487        }
488        ResStringPool_span* span = (ResStringPool_span*)(dat+preSize+styPos);
489        for (size_t i=0; i<N; i++) {
490            span->name.index = htodl(ent.spans[i].span.name.index);
491            span->firstChar = htodl(ent.spans[i].span.firstChar);
492            span->lastChar = htodl(ent.spans[i].span.lastChar);
493            span++;
494        }
495        span->name.index = htodl(ResStringPool_span::END);
496
497        styPos += totalSize;
498    }
499
500    if (STYLES > 0) {
501        // Add full terminator at the end (when reading we validate that
502        // the end of the pool is fully terminated to simplify error
503        // checking).
504        size_t extra = sizeof(ResStringPool_span)-sizeof(ResStringPool_ref);
505        uint8_t* dat = (uint8_t*)pool->editData(preSize + styPos + extra);
506        if (dat == NULL) {
507            fprintf(stderr, "ERROR: Out of memory for string styles\n");
508            return NO_MEMORY;
509        }
510        uint32_t* p = (uint32_t*)(dat+preSize+styPos);
511        while (extra > 0) {
512            *p++ = htodl(ResStringPool_span::END);
513            extra -= sizeof(uint32_t);
514        }
515        styPos += extra;
516    }
517
518    // Write header.
519
520    ResStringPool_header* header =
521        (ResStringPool_header*)pool->padData(sizeof(uint32_t));
522    if (header == NULL) {
523        fprintf(stderr, "ERROR: Out of memory for string pool\n");
524        return NO_MEMORY;
525    }
526    memset(header, 0, sizeof(*header));
527    header->header.type = htods(RES_STRING_POOL_TYPE);
528    header->header.headerSize = htods(sizeof(*header));
529    header->header.size = htodl(pool->getSize());
530    header->stringCount = htodl(ENTRIES);
531    header->styleCount = htodl(STYLES);
532    if (mUTF8) {
533        header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
534    }
535    header->stringsStart = htodl(preSize);
536    header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);
537
538    // Write string index array.
539
540    uint32_t* index = (uint32_t*)(header+1);
541    for (i=0; i<ENTRIES; i++) {
542        entry& ent = mEntries.editItemAt(mEntryArray[i]);
543        *index++ = htodl(ent.offset);
544        NOISY(printf("Writing entry #%d: \"%s\" ent=%d off=%d\n", i,
545                String8(ent.value).string(),
546                mEntryArray[i], ent.offset));
547    }
548
549    // Write style index array.
550
551    for (i=0; i<STYLES; i++) {
552        *index++ = htodl(mEntryStyleArray[i].offset);
553    }
554
555    return NO_ERROR;
556}
557
558ssize_t StringPool::offsetForString(const String16& val) const
559{
560    const Vector<size_t>* indices = offsetsForString(val);
561    ssize_t res = indices != NULL && indices->size() > 0 ? indices->itemAt(0) : -1;
562    NOISY(printf("Offset for string %s: %d (%s)\n", String8(val).string(), res,
563            res >= 0 ? String8(mEntries[mEntryArray[res]].value).string() : String8()));
564    return res;
565}
566
567const Vector<size_t>* StringPool::offsetsForString(const String16& val) const
568{
569    ssize_t pos = mValues.valueFor(val);
570    if (pos < 0) {
571        return NULL;
572    }
573    return &mEntries[mEntryArray[pos]].indices;
574}
575