String8.cpp revision d24b8183b93e781080b2c16c487e60d51c12da31
1/*
2 * Copyright (C) 2005 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <utils/String8.h>
18
19#include <utils/Log.h>
20#include <utils/String16.h>
21#include <utils/TextOutput.h>
22#include <utils/threads.h>
23
24#include <private/utils/Static.h>
25
26#include <ctype.h>
27
28namespace android {
29
30// ---------------------------------------------------------------------------
31
32static const uint32_t kByteMask = 0x000000BF;
33static const uint32_t kByteMark = 0x00000080;
34
35// Surrogates aren't valid for UTF-32 characters, so define some
36// constants that will let us screen them out.
37static const uint32_t kUnicodeSurrogateHighStart  = 0x0000D800;
38static const uint32_t kUnicodeSurrogateHighEnd    = 0x0000DBFF;
39static const uint32_t kUnicodeSurrogateLowStart   = 0x0000DC00;
40static const uint32_t kUnicodeSurrogateLowEnd     = 0x0000DFFF;
41static const uint32_t kUnicodeSurrogateStart      = kUnicodeSurrogateHighStart;
42static const uint32_t kUnicodeSurrogateEnd        = kUnicodeSurrogateLowEnd;
43
44// Mask used to set appropriate bits in first byte of UTF-8 sequence,
45// indexed by number of bytes in the sequence.
46static const uint32_t kFirstByteMark[] = {
47    0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0
48};
49
50// Separator used by resource paths. This is not platform dependent contrary
51// to OS_PATH_SEPARATOR.
52#define RES_PATH_SEPARATOR '/'
53
54// Return number of utf8 bytes required for the character.
55static size_t utf32_to_utf8_bytes(uint32_t srcChar)
56{
57    size_t bytesToWrite;
58
59    // Figure out how many bytes the result will require.
60    if (srcChar < 0x00000080)
61    {
62        bytesToWrite = 1;
63    }
64    else if (srcChar < 0x00000800)
65    {
66        bytesToWrite = 2;
67    }
68    else if (srcChar < 0x00010000)
69    {
70        if ((srcChar < kUnicodeSurrogateStart)
71         || (srcChar > kUnicodeSurrogateEnd))
72        {
73            bytesToWrite = 3;
74        }
75        else
76        {
77            // Surrogates are invalid UTF-32 characters.
78            return 0;
79        }
80    }
81    // Max code point for Unicode is 0x0010FFFF.
82    else if (srcChar < 0x00110000)
83    {
84        bytesToWrite = 4;
85    }
86    else
87    {
88        // Invalid UTF-32 character.
89        return 0;
90    }
91
92    return bytesToWrite;
93}
94
95// Write out the source character to <dstP>.
96
97static void utf32_to_utf8(uint8_t* dstP, uint32_t srcChar, size_t bytes)
98{
99    dstP += bytes;
100    switch (bytes)
101    {   /* note: everything falls through. */
102        case 4: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
103        case 3: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
104        case 2: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
105        case 1: *--dstP = (uint8_t)(srcChar | kFirstByteMark[bytes]);
106    }
107}
108
109// ---------------------------------------------------------------------------
110
111static SharedBuffer* gEmptyStringBuf = NULL;
112static char* gEmptyString = NULL;
113
114extern int gDarwinCantLoadAllObjects;
115int gDarwinIsReallyAnnoying;
116
117static inline char* getEmptyString()
118{
119    gEmptyStringBuf->acquire();
120    return gEmptyString;
121}
122
123void initialize_string8()
124{
125#ifdef LIBUTILS_NATIVE
126	  // Bite me, Darwin!
127		gDarwinIsReallyAnnoying = gDarwinCantLoadAllObjects;
128#endif
129
130    SharedBuffer* buf = SharedBuffer::alloc(1);
131    char* str = (char*)buf->data();
132    *str = 0;
133    gEmptyStringBuf = buf;
134    gEmptyString = str;
135}
136
137void terminate_string8()
138{
139    SharedBuffer::bufferFromData(gEmptyString)->release();
140    gEmptyStringBuf = NULL;
141    gEmptyString = NULL;
142}
143
144// ---------------------------------------------------------------------------
145
146static char* allocFromUTF8(const char* in, size_t len)
147{
148    if (len > 0) {
149        SharedBuffer* buf = SharedBuffer::alloc(len+1);
150        LOG_ASSERT(buf, "Unable to allocate shared buffer");
151        if (buf) {
152            char* str = (char*)buf->data();
153            memcpy(str, in, len);
154            str[len] = 0;
155            return str;
156        }
157        return NULL;
158    }
159
160    return getEmptyString();
161}
162
163// Note: not dealing with expanding surrogate pairs.
164static char* allocFromUTF16(const char16_t* in, size_t len)
165{
166    if (len == 0) return getEmptyString();
167
168    size_t bytes = 0;
169    const char16_t* end = in+len;
170    const char16_t* p = in;
171
172    while (p < end) {
173        bytes += utf32_to_utf8_bytes(*p);
174        p++;
175    }
176
177    SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
178    LOG_ASSERT(buf, "Unable to allocate shared buffer");
179    if (buf) {
180        p = in;
181        char* str = (char*)buf->data();
182        char* d = str;
183        while (p < end) {
184            uint32_t c = *p++;
185            size_t len = utf32_to_utf8_bytes(c);
186            utf32_to_utf8((uint8_t*)d, c, len);
187            d += len;
188        }
189        *d = 0;
190
191        return str;
192    }
193
194    return getEmptyString();
195}
196
197// ---------------------------------------------------------------------------
198
199String8::String8()
200    : mString(getEmptyString())
201{
202}
203
204String8::String8(const String8& o)
205    : mString(o.mString)
206{
207    SharedBuffer::bufferFromData(mString)->acquire();
208}
209
210String8::String8(const char* o)
211    : mString(allocFromUTF8(o, strlen(o)))
212{
213    if (mString == NULL) {
214        mString = getEmptyString();
215    }
216}
217
218String8::String8(const char* o, size_t len)
219    : mString(allocFromUTF8(o, len))
220{
221    if (mString == NULL) {
222        mString = getEmptyString();
223    }
224}
225
226String8::String8(const String16& o)
227    : mString(allocFromUTF16(o.string(), o.size()))
228{
229}
230
231String8::String8(const char16_t* o)
232    : mString(allocFromUTF16(o, strlen16(o)))
233{
234}
235
236String8::String8(const char16_t* o, size_t len)
237    : mString(allocFromUTF16(o, len))
238{
239}
240
241String8::~String8()
242{
243    SharedBuffer::bufferFromData(mString)->release();
244}
245
246void String8::setTo(const String8& other)
247{
248    SharedBuffer::bufferFromData(other.mString)->acquire();
249    SharedBuffer::bufferFromData(mString)->release();
250    mString = other.mString;
251}
252
253status_t String8::setTo(const char* other)
254{
255    SharedBuffer::bufferFromData(mString)->release();
256    mString = allocFromUTF8(other, strlen(other));
257    if (mString) return NO_ERROR;
258
259    mString = getEmptyString();
260    return NO_MEMORY;
261}
262
263status_t String8::setTo(const char* other, size_t len)
264{
265    SharedBuffer::bufferFromData(mString)->release();
266    mString = allocFromUTF8(other, len);
267    if (mString) return NO_ERROR;
268
269    mString = getEmptyString();
270    return NO_MEMORY;
271}
272
273status_t String8::setTo(const char16_t* other, size_t len)
274{
275    SharedBuffer::bufferFromData(mString)->release();
276    mString = allocFromUTF16(other, len);
277    if (mString) return NO_ERROR;
278
279    mString = getEmptyString();
280    return NO_MEMORY;
281}
282
283status_t String8::append(const String8& other)
284{
285    const size_t otherLen = other.bytes();
286    if (bytes() == 0) {
287        setTo(other);
288        return NO_ERROR;
289    } else if (otherLen == 0) {
290        return NO_ERROR;
291    }
292
293    return real_append(other.string(), otherLen);
294}
295
296status_t String8::append(const char* other)
297{
298    return append(other, strlen(other));
299}
300
301status_t String8::append(const char* other, size_t otherLen)
302{
303    if (bytes() == 0) {
304        return setTo(other, otherLen);
305    } else if (otherLen == 0) {
306        return NO_ERROR;
307    }
308
309    return real_append(other, otherLen);
310}
311
312status_t String8::real_append(const char* other, size_t otherLen)
313{
314    const size_t myLen = bytes();
315
316    SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
317        ->editResize(myLen+otherLen+1);
318    if (buf) {
319        char* str = (char*)buf->data();
320        mString = str;
321        str += myLen;
322        memcpy(str, other, otherLen);
323        str[otherLen] = '\0';
324        return NO_ERROR;
325    }
326    return NO_MEMORY;
327}
328
329char* String8::lockBuffer(size_t size)
330{
331    SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
332        ->editResize(size+1);
333    if (buf) {
334        char* str = (char*)buf->data();
335        mString = str;
336        return str;
337    }
338    return NULL;
339}
340
341void String8::unlockBuffer()
342{
343    unlockBuffer(strlen(mString));
344}
345
346status_t String8::unlockBuffer(size_t size)
347{
348    if (size != this->size()) {
349        SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
350            ->editResize(size+1);
351        if (buf) {
352            char* str = (char*)buf->data();
353            str[size] = 0;
354            mString = str;
355            return NO_ERROR;
356        }
357    }
358
359    return NO_MEMORY;
360}
361
362ssize_t String8::find(const char* other, size_t start) const
363{
364    size_t len = size();
365    if (start >= len) {
366        return -1;
367    }
368    const char* s = mString+start;
369    const char* p = strstr(s, other);
370    return p ? p-mString : -1;
371}
372
373void String8::toLower()
374{
375    toLower(0, size());
376}
377
378void String8::toLower(size_t start, size_t length)
379{
380    const size_t len = size();
381    if (start >= len) {
382        return;
383    }
384    if (start+length > len) {
385        length = len-start;
386    }
387    char* buf = lockBuffer(len);
388    buf += start;
389    while (length > 0) {
390        *buf = tolower(*buf);
391        buf++;
392        length--;
393    }
394    unlockBuffer(len);
395}
396
397void String8::toUpper()
398{
399    toUpper(0, size());
400}
401
402void String8::toUpper(size_t start, size_t length)
403{
404    const size_t len = size();
405    if (start >= len) {
406        return;
407    }
408    if (start+length > len) {
409        length = len-start;
410    }
411    char* buf = lockBuffer(len);
412    buf += start;
413    while (length > 0) {
414        *buf = toupper(*buf);
415        buf++;
416        length--;
417    }
418    unlockBuffer(len);
419}
420
421TextOutput& operator<<(TextOutput& to, const String8& val)
422{
423    to << val.string();
424    return to;
425}
426
427// ---------------------------------------------------------------------------
428// Path functions
429
430
431void String8::setPathName(const char* name)
432{
433    setPathName(name, strlen(name));
434}
435
436void String8::setPathName(const char* name, size_t len)
437{
438    char* buf = lockBuffer(len);
439
440    memcpy(buf, name, len);
441
442    // remove trailing path separator, if present
443    if (len > 0 && buf[len-1] == OS_PATH_SEPARATOR)
444        len--;
445
446    buf[len] = '\0';
447
448    unlockBuffer(len);
449}
450
451String8 String8::getPathLeaf(void) const
452{
453    const char* cp;
454    const char*const buf = mString;
455
456    cp = strrchr(buf, OS_PATH_SEPARATOR);
457    if (cp == NULL)
458        return String8(*this);
459    else
460        return String8(cp+1);
461}
462
463String8 String8::getPathDir(void) const
464{
465    const char* cp;
466    const char*const str = mString;
467
468    cp = strrchr(str, OS_PATH_SEPARATOR);
469    if (cp == NULL)
470        return String8("");
471    else
472        return String8(str, cp - str);
473}
474
475String8 String8::walkPath(String8* outRemains) const
476{
477    const char* cp;
478    const char*const str = mString;
479    const char* buf = str;
480
481    cp = strchr(buf, OS_PATH_SEPARATOR);
482    if (cp == buf) {
483        // don't include a leading '/'.
484        buf = buf+1;
485        cp = strchr(buf, OS_PATH_SEPARATOR);
486    }
487
488    if (cp == NULL) {
489        String8 res = buf != str ? String8(buf) : *this;
490        if (outRemains) *outRemains = String8("");
491        return res;
492    }
493
494    String8 res(buf, cp-buf);
495    if (outRemains) *outRemains = String8(cp+1);
496    return res;
497}
498
499/*
500 * Helper function for finding the start of an extension in a pathname.
501 *
502 * Returns a pointer inside mString, or NULL if no extension was found.
503 */
504char* String8::find_extension(void) const
505{
506    const char* lastSlash;
507    const char* lastDot;
508    int extLen;
509    const char* const str = mString;
510
511    // only look at the filename
512    lastSlash = strrchr(str, OS_PATH_SEPARATOR);
513    if (lastSlash == NULL)
514        lastSlash = str;
515    else
516        lastSlash++;
517
518    // find the last dot
519    lastDot = strrchr(lastSlash, '.');
520    if (lastDot == NULL)
521        return NULL;
522
523    // looks good, ship it
524    return const_cast<char*>(lastDot);
525}
526
527String8 String8::getPathExtension(void) const
528{
529    char* ext;
530
531    ext = find_extension();
532    if (ext != NULL)
533        return String8(ext);
534    else
535        return String8("");
536}
537
538String8 String8::getBasePath(void) const
539{
540    char* ext;
541    const char* const str = mString;
542
543    ext = find_extension();
544    if (ext == NULL)
545        return String8(*this);
546    else
547        return String8(str, ext - str);
548}
549
550String8& String8::appendPath(const char* name)
551{
552    // TODO: The test below will fail for Win32 paths. Fix later or ignore.
553    if (name[0] != OS_PATH_SEPARATOR) {
554        if (*name == '\0') {
555            // nothing to do
556            return *this;
557        }
558
559        size_t len = length();
560        if (len == 0) {
561            // no existing filename, just use the new one
562            setPathName(name);
563            return *this;
564        }
565
566        // make room for oldPath + '/' + newPath
567        int newlen = strlen(name);
568
569        char* buf = lockBuffer(len+1+newlen);
570
571        // insert a '/' if needed
572        if (buf[len-1] != OS_PATH_SEPARATOR)
573            buf[len++] = OS_PATH_SEPARATOR;
574
575        memcpy(buf+len, name, newlen+1);
576        len += newlen;
577
578        unlockBuffer(len);
579
580        return *this;
581    } else {
582        setPathName(name);
583        return *this;
584    }
585}
586
587String8& String8::convertToResPath()
588{
589#if OS_PATH_SEPARATOR != RES_PATH_SEPARATOR
590    size_t len = length();
591    if (len > 0) {
592        char * buf = lockBuffer(len);
593        for (char * end = buf + len; buf < end; ++buf) {
594            if (*buf == OS_PATH_SEPARATOR)
595                *buf = RES_PATH_SEPARATOR;
596        }
597        unlockBuffer(len);
598    }
599#endif
600    return *this;
601}
602
603
604}; // namespace android
605