String16.cpp revision 19138468caf7050d482dc15f35a344eab11bb756
1/*
2 * Copyright (C) 2005 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <utils/String16.h>
18
19#include <utils/Debug.h>
20#include <utils/Log.h>
21#include <utils/String8.h>
22#include <utils/TextOutput.h>
23#include <utils/threads.h>
24
25#include <private/utils/Static.h>
26
27#ifdef HAVE_WINSOCK
28# undef  nhtol
29# undef  htonl
30# undef  nhtos
31# undef  htons
32
33# ifdef HAVE_LITTLE_ENDIAN
34#  define ntohl(x)    ( ((x) << 24) | (((x) >> 24) & 255) | (((x) << 8) & 0xff0000) | (((x) >> 8) & 0xff00) )
35#  define htonl(x)    ntohl(x)
36#  define ntohs(x)    ( (((x) << 8) & 0xff00) | (((x) >> 8) & 255) )
37#  define htons(x)    ntohs(x)
38# else
39#  define ntohl(x)    (x)
40#  define htonl(x)    (x)
41#  define ntohs(x)    (x)
42#  define htons(x)    (x)
43# endif
44#else
45# include <netinet/in.h>
46#endif
47
48#include <memory.h>
49#include <stdio.h>
50#include <ctype.h>
51
52// ---------------------------------------------------------------------------
53
54int strcmp16(const char16_t *s1, const char16_t *s2)
55{
56  char16_t ch;
57  int d = 0;
58
59  while ( 1 ) {
60    d = (int)(ch = *s1++) - (int)*s2++;
61    if ( d || !ch )
62      break;
63  }
64
65  return d;
66}
67
68int strncmp16(const char16_t *s1, const char16_t *s2, size_t n)
69{
70  char16_t ch;
71  int d = 0;
72
73  while ( n-- ) {
74    d = (int)(ch = *s1++) - (int)*s2++;
75    if ( d || !ch )
76      break;
77  }
78
79  return d;
80}
81
82char16_t *strcpy16(char16_t *dst, const char16_t *src)
83{
84  char16_t *q = dst;
85  const char16_t *p = src;
86  char16_t ch;
87
88  do {
89    *q++ = ch = *p++;
90  } while ( ch );
91
92  return dst;
93}
94
95size_t strlen16(const char16_t *s)
96{
97  const char16_t *ss = s;
98  while ( *ss )
99    ss++;
100  return ss-s;
101}
102
103
104char16_t *strncpy16(char16_t *dst, const char16_t *src, size_t n)
105{
106  char16_t *q = dst;
107  const char16_t *p = src;
108  char ch;
109
110  while (n) {
111    n--;
112    *q++ = ch = *p++;
113    if ( !ch )
114      break;
115  }
116
117  *q = 0;
118
119  return dst;
120}
121
122size_t strnlen16(const char16_t *s, size_t maxlen)
123{
124  const char16_t *ss = s;
125
126  /* Important: the maxlen test must precede the reference through ss;
127     since the byte beyond the maximum may segfault */
128  while ((maxlen > 0) && *ss) {
129    ss++;
130    maxlen--;
131  }
132  return ss-s;
133}
134
135int strzcmp16(const char16_t *s1, size_t n1, const char16_t *s2, size_t n2)
136{
137    const char16_t* e1 = s1+n1;
138    const char16_t* e2 = s2+n2;
139
140    while (s1 < e1 && s2 < e2) {
141        const int d = (int)*s1++ - (int)*s2++;
142        if (d) {
143            return d;
144        }
145    }
146
147    return n1 < n2
148        ? (0 - (int)*s2)
149        : (n1 > n2
150           ? ((int)*s1 - 0)
151           : 0);
152}
153
154int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2)
155{
156    const char16_t* e1 = s1H+n1;
157    const char16_t* e2 = s2N+n2;
158
159    while (s1H < e1 && s2N < e2) {
160        const char16_t c2 = ntohs(*s2N);
161        const int d = (int)*s1H++ - (int)c2;
162        s2N++;
163        if (d) {
164            return d;
165        }
166    }
167
168    return n1 < n2
169        ? (0 - (int)ntohs(*s2N))
170        : (n1 > n2
171           ? ((int)*s1H - 0)
172           : 0);
173}
174
175static inline size_t
176utf8_char_len(uint8_t ch)
177{
178    return ((0xe5000000 >> ((ch >> 3) & 0x1e)) & 3) + 1;
179}
180
181#define UTF8_SHIFT_AND_MASK(unicode, byte)  (unicode)<<=6; (unicode) |= (0x3f & (byte));
182
183static inline uint32_t
184utf8_to_utf32(const uint8_t *src, size_t length)
185{
186    uint32_t unicode;
187
188    switch (length)
189    {
190        case 1:
191            return src[0];
192        case 2:
193            unicode = src[0] & 0x1f;
194            UTF8_SHIFT_AND_MASK(unicode, src[1])
195            return unicode;
196        case 3:
197            unicode = src[0] & 0x0f;
198            UTF8_SHIFT_AND_MASK(unicode, src[1])
199            UTF8_SHIFT_AND_MASK(unicode, src[2])
200            return unicode;
201        case 4:
202            unicode = src[0] & 0x07;
203            UTF8_SHIFT_AND_MASK(unicode, src[1])
204            UTF8_SHIFT_AND_MASK(unicode, src[2])
205            UTF8_SHIFT_AND_MASK(unicode, src[3])
206            return unicode;
207        default:
208            return 0xffff;
209    }
210
211    //printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result);
212}
213
214void
215utf8_to_utf16(const uint8_t *src, size_t srcLen,
216        char16_t* dst, const size_t dstLen)
217{
218    const uint8_t* const end = src + srcLen;
219    const char16_t* const dstEnd = dst + dstLen;
220    while (src < end && dst < dstEnd) {
221        size_t len = utf8_char_len(*src);
222        uint32_t codepoint = utf8_to_utf32((const uint8_t*)src, len);
223
224        // Convert the UTF32 codepoint to one or more UTF16 codepoints
225        if (codepoint <= 0xFFFF) {
226            // Single UTF16 character
227            *dst++ = (char16_t) codepoint;
228        } else {
229            // Multiple UTF16 characters with surrogates
230            codepoint = codepoint - 0x10000;
231            *dst++ = (char16_t) ((codepoint >> 10) + 0xD800);
232            *dst++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
233        }
234
235        src += len;
236    }
237    if (dst < dstEnd) {
238        *dst = 0;
239    }
240}
241
242// ---------------------------------------------------------------------------
243
244namespace android {
245
246static SharedBuffer* gEmptyStringBuf = NULL;
247static char16_t* gEmptyString = NULL;
248
249static inline char16_t* getEmptyString()
250{
251    gEmptyStringBuf->acquire();
252   return gEmptyString;
253}
254
255void initialize_string16()
256{
257    SharedBuffer* buf = SharedBuffer::alloc(sizeof(char16_t));
258    char16_t* str = (char16_t*)buf->data();
259    *str = 0;
260    gEmptyStringBuf = buf;
261    gEmptyString = str;
262}
263
264void terminate_string16()
265{
266    SharedBuffer::bufferFromData(gEmptyString)->release();
267    gEmptyStringBuf = NULL;
268    gEmptyString = NULL;
269}
270
271// ---------------------------------------------------------------------------
272
273static char16_t* allocFromUTF8(const char* in, size_t len)
274{
275    if (len == 0) return getEmptyString();
276
277    size_t chars = 0;
278    const char* end = in+len;
279    const char* p = in;
280
281    while (p < end) {
282        chars++;
283        int utf8len = utf8_char_len(*p);
284        uint32_t codepoint = utf8_to_utf32((const uint8_t*)p, utf8len);
285        if (codepoint > 0xFFFF) chars++; // this will be a surrogate pair in utf16
286        p += utf8len;
287    }
288
289    size_t bufSize = (chars+1)*sizeof(char16_t);
290    SharedBuffer* buf = SharedBuffer::alloc(bufSize);
291    if (buf) {
292        p = in;
293        char16_t* str = (char16_t*)buf->data();
294
295        utf8_to_utf16((const uint8_t*)p, len, str, bufSize);
296
297        //printf("Created UTF-16 string from UTF-8 \"%s\":", in);
298        //printHexData(1, str, buf->size(), 16, 1);
299        //printf("\n");
300
301        return str;
302    }
303
304    return getEmptyString();
305}
306
307// ---------------------------------------------------------------------------
308
309String16::String16()
310    : mString(getEmptyString())
311{
312}
313
314String16::String16(const String16& o)
315    : mString(o.mString)
316{
317    SharedBuffer::bufferFromData(mString)->acquire();
318}
319
320String16::String16(const String16& o, size_t len, size_t begin)
321    : mString(getEmptyString())
322{
323    setTo(o, len, begin);
324}
325
326String16::String16(const char16_t* o)
327{
328    size_t len = strlen16(o);
329    SharedBuffer* buf = SharedBuffer::alloc((len+1)*sizeof(char16_t));
330    LOG_ASSERT(buf, "Unable to allocate shared buffer");
331    if (buf) {
332        char16_t* str = (char16_t*)buf->data();
333        strcpy16(str, o);
334        mString = str;
335        return;
336    }
337
338    mString = getEmptyString();
339}
340
341String16::String16(const char16_t* o, size_t len)
342{
343    SharedBuffer* buf = SharedBuffer::alloc((len+1)*sizeof(char16_t));
344    LOG_ASSERT(buf, "Unable to allocate shared buffer");
345    if (buf) {
346        char16_t* str = (char16_t*)buf->data();
347        memcpy(str, o, len*sizeof(char16_t));
348        str[len] = 0;
349        mString = str;
350        return;
351    }
352
353    mString = getEmptyString();
354}
355
356String16::String16(const String8& o)
357    : mString(allocFromUTF8(o.string(), o.size()))
358{
359}
360
361String16::String16(const char* o)
362    : mString(allocFromUTF8(o, strlen(o)))
363{
364}
365
366String16::String16(const char* o, size_t len)
367    : mString(allocFromUTF8(o, len))
368{
369}
370
371String16::~String16()
372{
373    SharedBuffer::bufferFromData(mString)->release();
374}
375
376void String16::setTo(const String16& other)
377{
378    SharedBuffer::bufferFromData(other.mString)->acquire();
379    SharedBuffer::bufferFromData(mString)->release();
380    mString = other.mString;
381}
382
383status_t String16::setTo(const String16& other, size_t len, size_t begin)
384{
385    const size_t N = other.size();
386    if (begin >= N) {
387        SharedBuffer::bufferFromData(mString)->release();
388        mString = getEmptyString();
389        return NO_ERROR;
390    }
391    if ((begin+len) > N) len = N-begin;
392    if (begin == 0 && len == N) {
393        setTo(other);
394        return NO_ERROR;
395    }
396
397    if (&other == this) {
398        LOG_ALWAYS_FATAL("Not implemented");
399    }
400
401    return setTo(other.string()+begin, len);
402}
403
404status_t String16::setTo(const char16_t* other)
405{
406    return setTo(other, strlen16(other));
407}
408
409status_t String16::setTo(const char16_t* other, size_t len)
410{
411    SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
412        ->editResize((len+1)*sizeof(char16_t));
413    if (buf) {
414        char16_t* str = (char16_t*)buf->data();
415        memmove(str, other, len*sizeof(char16_t));
416        str[len] = 0;
417        mString = str;
418        return NO_ERROR;
419    }
420    return NO_MEMORY;
421}
422
423status_t String16::append(const String16& other)
424{
425    const size_t myLen = size();
426    const size_t otherLen = other.size();
427    if (myLen == 0) {
428        setTo(other);
429        return NO_ERROR;
430    } else if (otherLen == 0) {
431        return NO_ERROR;
432    }
433
434    SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
435        ->editResize((myLen+otherLen+1)*sizeof(char16_t));
436    if (buf) {
437        char16_t* str = (char16_t*)buf->data();
438        memcpy(str+myLen, other, (otherLen+1)*sizeof(char16_t));
439        mString = str;
440        return NO_ERROR;
441    }
442    return NO_MEMORY;
443}
444
445status_t String16::append(const char16_t* chrs, size_t otherLen)
446{
447    const size_t myLen = size();
448    if (myLen == 0) {
449        setTo(chrs, otherLen);
450        return NO_ERROR;
451    } else if (otherLen == 0) {
452        return NO_ERROR;
453    }
454
455    SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
456        ->editResize((myLen+otherLen+1)*sizeof(char16_t));
457    if (buf) {
458        char16_t* str = (char16_t*)buf->data();
459        memcpy(str+myLen, chrs, otherLen*sizeof(char16_t));
460        str[myLen+otherLen] = 0;
461        mString = str;
462        return NO_ERROR;
463    }
464    return NO_MEMORY;
465}
466
467status_t String16::insert(size_t pos, const char16_t* chrs)
468{
469    return insert(pos, chrs, strlen16(chrs));
470}
471
472status_t String16::insert(size_t pos, const char16_t* chrs, size_t len)
473{
474    const size_t myLen = size();
475    if (myLen == 0) {
476        return setTo(chrs, len);
477        return NO_ERROR;
478    } else if (len == 0) {
479        return NO_ERROR;
480    }
481
482    if (pos > myLen) pos = myLen;
483
484    #if 0
485    printf("Insert in to %s: pos=%d, len=%d, myLen=%d, chrs=%s\n",
486           String8(*this).string(), pos,
487           len, myLen, String8(chrs, len).string());
488    #endif
489
490    SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
491        ->editResize((myLen+len+1)*sizeof(char16_t));
492    if (buf) {
493        char16_t* str = (char16_t*)buf->data();
494        if (pos < myLen) {
495            memmove(str+pos+len, str+pos, (myLen-pos)*sizeof(char16_t));
496        }
497        memcpy(str+pos, chrs, len*sizeof(char16_t));
498        str[myLen+len] = 0;
499        mString = str;
500        #if 0
501        printf("Result (%d chrs): %s\n", size(), String8(*this).string());
502        #endif
503        return NO_ERROR;
504    }
505    return NO_MEMORY;
506}
507
508ssize_t String16::findFirst(char16_t c) const
509{
510    const char16_t* str = string();
511    const char16_t* p = str;
512    const char16_t* e = p + size();
513    while (p < e) {
514        if (*p == c) {
515            return p-str;
516        }
517        p++;
518    }
519    return -1;
520}
521
522ssize_t String16::findLast(char16_t c) const
523{
524    const char16_t* str = string();
525    const char16_t* p = str;
526    const char16_t* e = p + size();
527    while (p < e) {
528        e--;
529        if (*e == c) {
530            return e-str;
531        }
532    }
533    return -1;
534}
535
536bool String16::startsWith(const String16& prefix) const
537{
538    const size_t ps = prefix.size();
539    if (ps > size()) return false;
540    return strzcmp16(mString, ps, prefix.string(), ps) == 0;
541}
542
543bool String16::startsWith(const char16_t* prefix) const
544{
545    const size_t ps = strlen16(prefix);
546    if (ps > size()) return false;
547    return strncmp16(mString, prefix, ps) == 0;
548}
549
550status_t String16::makeLower()
551{
552    const size_t N = size();
553    const char16_t* str = string();
554    char16_t* edit = NULL;
555    for (size_t i=0; i<N; i++) {
556        const char16_t v = str[i];
557        if (v >= 'A' && v <= 'Z') {
558            if (!edit) {
559                SharedBuffer* buf = SharedBuffer::bufferFromData(mString)->edit();
560                if (!buf) {
561                    return NO_MEMORY;
562                }
563                edit = (char16_t*)buf->data();
564                mString = str = edit;
565            }
566            edit[i] = tolower((char)v);
567        }
568    }
569    return NO_ERROR;
570}
571
572status_t String16::replaceAll(char16_t replaceThis, char16_t withThis)
573{
574    const size_t N = size();
575    const char16_t* str = string();
576    char16_t* edit = NULL;
577    for (size_t i=0; i<N; i++) {
578        if (str[i] == replaceThis) {
579            if (!edit) {
580                SharedBuffer* buf = SharedBuffer::bufferFromData(mString)->edit();
581                if (!buf) {
582                    return NO_MEMORY;
583                }
584                edit = (char16_t*)buf->data();
585                mString = str = edit;
586            }
587            edit[i] = withThis;
588        }
589    }
590    return NO_ERROR;
591}
592
593status_t String16::remove(size_t len, size_t begin)
594{
595    const size_t N = size();
596    if (begin >= N) {
597        SharedBuffer::bufferFromData(mString)->release();
598        mString = getEmptyString();
599        return NO_ERROR;
600    }
601    if ((begin+len) > N) len = N-begin;
602    if (begin == 0 && len == N) {
603        return NO_ERROR;
604    }
605
606    if (begin > 0) {
607        SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
608            ->editResize((N+1)*sizeof(char16_t));
609        if (!buf) {
610            return NO_MEMORY;
611        }
612        char16_t* str = (char16_t*)buf->data();
613        memmove(str, str+begin, (N-begin+1)*sizeof(char16_t));
614        mString = str;
615    }
616    SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
617        ->editResize((len+1)*sizeof(char16_t));
618    if (buf) {
619        char16_t* str = (char16_t*)buf->data();
620        str[len] = 0;
621        mString = str;
622        return NO_ERROR;
623    }
624    return NO_MEMORY;
625}
626
627TextOutput& operator<<(TextOutput& to, const String16& val)
628{
629    to << String8(val).string();
630    return to;
631}
632
633}; // namespace android
634