String8.cpp revision 92f5984d2c2cd73b6b9f68c02c147877d1e2fc46
1/* 2 * Copyright (C) 2005 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <utils/String8.h> 18 19#include <utils/Log.h> 20#include <utils/String16.h> 21#include <utils/TextOutput.h> 22#include <utils/threads.h> 23 24#include <private/utils/Static.h> 25 26#include <ctype.h> 27 28/* 29 * Functions outside android is below the namespace android, since they use 30 * functions and constants in android namespace. 31 */ 32 33// --------------------------------------------------------------------------- 34 35namespace android { 36 37static const char32_t kByteMask = 0x000000BF; 38static const char32_t kByteMark = 0x00000080; 39 40// Surrogates aren't valid for UTF-32 characters, so define some 41// constants that will let us screen them out. 42static const char32_t kUnicodeSurrogateHighStart = 0x0000D800; 43static const char32_t kUnicodeSurrogateHighEnd = 0x0000DBFF; 44static const char32_t kUnicodeSurrogateLowStart = 0x0000DC00; 45static const char32_t kUnicodeSurrogateLowEnd = 0x0000DFFF; 46static const char32_t kUnicodeSurrogateStart = kUnicodeSurrogateHighStart; 47static const char32_t kUnicodeSurrogateEnd = kUnicodeSurrogateLowEnd; 48static const char32_t kUnicodeMaxCodepoint = 0x0010FFFF; 49 50// Mask used to set appropriate bits in first byte of UTF-8 sequence, 51// indexed by number of bytes in the sequence. 52// 0xxxxxxx 53// -> (00-7f) 7bit. Bit mask for the first byte is 0x00000000 54// 110yyyyx 10xxxxxx 55// -> (c0-df)(80-bf) 11bit. Bit mask is 0x000000C0 56// 1110yyyy 10yxxxxx 10xxxxxx 57// -> (e0-ef)(80-bf)(80-bf) 16bit. Bit mask is 0x000000E0 58// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx 59// -> (f0-f7)(80-bf)(80-bf)(80-bf) 21bit. Bit mask is 0x000000F0 60static const char32_t kFirstByteMark[] = { 61 0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0 62}; 63 64// Separator used by resource paths. This is not platform dependent contrary 65// to OS_PATH_SEPARATOR. 66#define RES_PATH_SEPARATOR '/' 67 68// Return number of utf8 bytes required for the character. 69static size_t utf32_to_utf8_bytes(char32_t srcChar) 70{ 71 size_t bytesToWrite; 72 73 // Figure out how many bytes the result will require. 74 if (srcChar < 0x00000080) 75 { 76 bytesToWrite = 1; 77 } 78 else if (srcChar < 0x00000800) 79 { 80 bytesToWrite = 2; 81 } 82 else if (srcChar < 0x00010000) 83 { 84 if ((srcChar < kUnicodeSurrogateStart) 85 || (srcChar > kUnicodeSurrogateEnd)) 86 { 87 bytesToWrite = 3; 88 } 89 else 90 { 91 // Surrogates are invalid UTF-32 characters. 92 return 0; 93 } 94 } 95 // Max code point for Unicode is 0x0010FFFF. 96 else if (srcChar <= kUnicodeMaxCodepoint) 97 { 98 bytesToWrite = 4; 99 } 100 else 101 { 102 // Invalid UTF-32 character. 103 return 0; 104 } 105 106 return bytesToWrite; 107} 108 109// Write out the source character to <dstP>. 110 111static void utf32_to_utf8(uint8_t* dstP, char32_t srcChar, size_t bytes) 112{ 113 dstP += bytes; 114 switch (bytes) 115 { /* note: everything falls through. */ 116 case 4: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 117 case 3: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 118 case 2: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 119 case 1: *--dstP = (uint8_t)(srcChar | kFirstByteMark[bytes]); 120 } 121} 122 123// --------------------------------------------------------------------------- 124 125static SharedBuffer* gEmptyStringBuf = NULL; 126static char* gEmptyString = NULL; 127 128extern int gDarwinCantLoadAllObjects; 129int gDarwinIsReallyAnnoying; 130 131static inline char* getEmptyString() 132{ 133 gEmptyStringBuf->acquire(); 134 return gEmptyString; 135} 136 137void initialize_string8() 138{ 139#ifdef LIBUTILS_NATIVE 140 // Bite me, Darwin! 141 gDarwinIsReallyAnnoying = gDarwinCantLoadAllObjects; 142#endif 143 144 SharedBuffer* buf = SharedBuffer::alloc(1); 145 char* str = (char*)buf->data(); 146 *str = 0; 147 gEmptyStringBuf = buf; 148 gEmptyString = str; 149} 150 151void terminate_string8() 152{ 153 SharedBuffer::bufferFromData(gEmptyString)->release(); 154 gEmptyStringBuf = NULL; 155 gEmptyString = NULL; 156} 157 158// --------------------------------------------------------------------------- 159 160static char* allocFromUTF8(const char* in, size_t len) 161{ 162 if (len > 0) { 163 SharedBuffer* buf = SharedBuffer::alloc(len+1); 164 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 165 if (buf) { 166 char* str = (char*)buf->data(); 167 memcpy(str, in, len); 168 str[len] = 0; 169 return str; 170 } 171 return NULL; 172 } 173 174 return getEmptyString(); 175} 176 177template<typename T, typename L> 178static char* allocFromUTF16OrUTF32(const T* in, L len) 179{ 180 if (len == 0) return getEmptyString(); 181 182 size_t bytes = 0; 183 const T* end = in+len; 184 const T* p = in; 185 186 while (p < end) { 187 bytes += utf32_to_utf8_bytes(*p); 188 p++; 189 } 190 191 SharedBuffer* buf = SharedBuffer::alloc(bytes+1); 192 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 193 if (buf) { 194 p = in; 195 char* str = (char*)buf->data(); 196 char* d = str; 197 while (p < end) { 198 const T c = *p++; 199 size_t len = utf32_to_utf8_bytes(c); 200 utf32_to_utf8((uint8_t*)d, c, len); 201 d += len; 202 } 203 *d = 0; 204 205 return str; 206 } 207 208 return getEmptyString(); 209} 210 211static char* allocFromUTF16(const char16_t* in, size_t len) 212{ 213 if (len == 0) return getEmptyString(); 214 215 const size_t bytes = utf8_length_from_utf16(in, len); 216 217 SharedBuffer* buf = SharedBuffer::alloc(bytes+1); 218 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 219 if (buf) { 220 char* str = (char*)buf->data(); 221 222 utf16_to_utf8(in, len, str, bytes+1); 223 224 return str; 225 } 226 227 return getEmptyString(); 228} 229 230static char* allocFromUTF32(const char32_t* in, size_t len) 231{ 232 return allocFromUTF16OrUTF32<char32_t, size_t>(in, len); 233} 234 235// --------------------------------------------------------------------------- 236 237String8::String8() 238 : mString(getEmptyString()) 239{ 240} 241 242String8::String8(const String8& o) 243 : mString(o.mString) 244{ 245 SharedBuffer::bufferFromData(mString)->acquire(); 246} 247 248String8::String8(const char* o) 249 : mString(allocFromUTF8(o, strlen(o))) 250{ 251 if (mString == NULL) { 252 mString = getEmptyString(); 253 } 254} 255 256String8::String8(const char* o, size_t len) 257 : mString(allocFromUTF8(o, len)) 258{ 259 if (mString == NULL) { 260 mString = getEmptyString(); 261 } 262} 263 264String8::String8(const String16& o) 265 : mString(allocFromUTF16(o.string(), o.size())) 266{ 267} 268 269String8::String8(const char16_t* o) 270 : mString(allocFromUTF16(o, strlen16(o))) 271{ 272} 273 274String8::String8(const char16_t* o, size_t len) 275 : mString(allocFromUTF16(o, len)) 276{ 277} 278 279String8::String8(const char32_t* o) 280 : mString(allocFromUTF32(o, strlen32(o))) 281{ 282} 283 284String8::String8(const char32_t* o, size_t len) 285 : mString(allocFromUTF32(o, len)) 286{ 287} 288 289String8::~String8() 290{ 291 SharedBuffer::bufferFromData(mString)->release(); 292} 293 294void String8::setTo(const String8& other) 295{ 296 SharedBuffer::bufferFromData(other.mString)->acquire(); 297 SharedBuffer::bufferFromData(mString)->release(); 298 mString = other.mString; 299} 300 301status_t String8::setTo(const char* other) 302{ 303 SharedBuffer::bufferFromData(mString)->release(); 304 mString = allocFromUTF8(other, strlen(other)); 305 if (mString) return NO_ERROR; 306 307 mString = getEmptyString(); 308 return NO_MEMORY; 309} 310 311status_t String8::setTo(const char* other, size_t len) 312{ 313 SharedBuffer::bufferFromData(mString)->release(); 314 mString = allocFromUTF8(other, len); 315 if (mString) return NO_ERROR; 316 317 mString = getEmptyString(); 318 return NO_MEMORY; 319} 320 321status_t String8::setTo(const char16_t* other, size_t len) 322{ 323 SharedBuffer::bufferFromData(mString)->release(); 324 mString = allocFromUTF16(other, len); 325 if (mString) return NO_ERROR; 326 327 mString = getEmptyString(); 328 return NO_MEMORY; 329} 330 331status_t String8::setTo(const char32_t* other, size_t len) 332{ 333 SharedBuffer::bufferFromData(mString)->release(); 334 mString = allocFromUTF32(other, len); 335 if (mString) return NO_ERROR; 336 337 mString = getEmptyString(); 338 return NO_MEMORY; 339} 340 341status_t String8::append(const String8& other) 342{ 343 const size_t otherLen = other.bytes(); 344 if (bytes() == 0) { 345 setTo(other); 346 return NO_ERROR; 347 } else if (otherLen == 0) { 348 return NO_ERROR; 349 } 350 351 return real_append(other.string(), otherLen); 352} 353 354status_t String8::append(const char* other) 355{ 356 return append(other, strlen(other)); 357} 358 359status_t String8::append(const char* other, size_t otherLen) 360{ 361 if (bytes() == 0) { 362 return setTo(other, otherLen); 363 } else if (otherLen == 0) { 364 return NO_ERROR; 365 } 366 367 return real_append(other, otherLen); 368} 369 370status_t String8::real_append(const char* other, size_t otherLen) 371{ 372 const size_t myLen = bytes(); 373 374 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 375 ->editResize(myLen+otherLen+1); 376 if (buf) { 377 char* str = (char*)buf->data(); 378 mString = str; 379 str += myLen; 380 memcpy(str, other, otherLen); 381 str[otherLen] = '\0'; 382 return NO_ERROR; 383 } 384 return NO_MEMORY; 385} 386 387char* String8::lockBuffer(size_t size) 388{ 389 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 390 ->editResize(size+1); 391 if (buf) { 392 char* str = (char*)buf->data(); 393 mString = str; 394 return str; 395 } 396 return NULL; 397} 398 399void String8::unlockBuffer() 400{ 401 unlockBuffer(strlen(mString)); 402} 403 404status_t String8::unlockBuffer(size_t size) 405{ 406 if (size != this->size()) { 407 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 408 ->editResize(size+1); 409 if (buf) { 410 char* str = (char*)buf->data(); 411 str[size] = 0; 412 mString = str; 413 return NO_ERROR; 414 } 415 } 416 417 return NO_MEMORY; 418} 419 420ssize_t String8::find(const char* other, size_t start) const 421{ 422 size_t len = size(); 423 if (start >= len) { 424 return -1; 425 } 426 const char* s = mString+start; 427 const char* p = strstr(s, other); 428 return p ? p-mString : -1; 429} 430 431void String8::toLower() 432{ 433 toLower(0, size()); 434} 435 436void String8::toLower(size_t start, size_t length) 437{ 438 const size_t len = size(); 439 if (start >= len) { 440 return; 441 } 442 if (start+length > len) { 443 length = len-start; 444 } 445 char* buf = lockBuffer(len); 446 buf += start; 447 while (length > 0) { 448 *buf = tolower(*buf); 449 buf++; 450 length--; 451 } 452 unlockBuffer(len); 453} 454 455void String8::toUpper() 456{ 457 toUpper(0, size()); 458} 459 460void String8::toUpper(size_t start, size_t length) 461{ 462 const size_t len = size(); 463 if (start >= len) { 464 return; 465 } 466 if (start+length > len) { 467 length = len-start; 468 } 469 char* buf = lockBuffer(len); 470 buf += start; 471 while (length > 0) { 472 *buf = toupper(*buf); 473 buf++; 474 length--; 475 } 476 unlockBuffer(len); 477} 478 479size_t String8::getUtf32Length() const 480{ 481 return utf32_length(mString, length()); 482} 483 484int32_t String8::getUtf32At(size_t index, size_t *next_index) const 485{ 486 return utf32_at(mString, length(), index, next_index); 487} 488 489size_t String8::getUtf32(char32_t* dst, size_t dst_len) const 490{ 491 return utf8_to_utf32(mString, length(), dst, dst_len); 492} 493 494TextOutput& operator<<(TextOutput& to, const String8& val) 495{ 496 to << val.string(); 497 return to; 498} 499 500// --------------------------------------------------------------------------- 501// Path functions 502 503void String8::setPathName(const char* name) 504{ 505 setPathName(name, strlen(name)); 506} 507 508void String8::setPathName(const char* name, size_t len) 509{ 510 char* buf = lockBuffer(len); 511 512 memcpy(buf, name, len); 513 514 // remove trailing path separator, if present 515 if (len > 0 && buf[len-1] == OS_PATH_SEPARATOR) 516 len--; 517 518 buf[len] = '\0'; 519 520 unlockBuffer(len); 521} 522 523String8 String8::getPathLeaf(void) const 524{ 525 const char* cp; 526 const char*const buf = mString; 527 528 cp = strrchr(buf, OS_PATH_SEPARATOR); 529 if (cp == NULL) 530 return String8(*this); 531 else 532 return String8(cp+1); 533} 534 535String8 String8::getPathDir(void) const 536{ 537 const char* cp; 538 const char*const str = mString; 539 540 cp = strrchr(str, OS_PATH_SEPARATOR); 541 if (cp == NULL) 542 return String8(""); 543 else 544 return String8(str, cp - str); 545} 546 547String8 String8::walkPath(String8* outRemains) const 548{ 549 const char* cp; 550 const char*const str = mString; 551 const char* buf = str; 552 553 cp = strchr(buf, OS_PATH_SEPARATOR); 554 if (cp == buf) { 555 // don't include a leading '/'. 556 buf = buf+1; 557 cp = strchr(buf, OS_PATH_SEPARATOR); 558 } 559 560 if (cp == NULL) { 561 String8 res = buf != str ? String8(buf) : *this; 562 if (outRemains) *outRemains = String8(""); 563 return res; 564 } 565 566 String8 res(buf, cp-buf); 567 if (outRemains) *outRemains = String8(cp+1); 568 return res; 569} 570 571/* 572 * Helper function for finding the start of an extension in a pathname. 573 * 574 * Returns a pointer inside mString, or NULL if no extension was found. 575 */ 576char* String8::find_extension(void) const 577{ 578 const char* lastSlash; 579 const char* lastDot; 580 int extLen; 581 const char* const str = mString; 582 583 // only look at the filename 584 lastSlash = strrchr(str, OS_PATH_SEPARATOR); 585 if (lastSlash == NULL) 586 lastSlash = str; 587 else 588 lastSlash++; 589 590 // find the last dot 591 lastDot = strrchr(lastSlash, '.'); 592 if (lastDot == NULL) 593 return NULL; 594 595 // looks good, ship it 596 return const_cast<char*>(lastDot); 597} 598 599String8 String8::getPathExtension(void) const 600{ 601 char* ext; 602 603 ext = find_extension(); 604 if (ext != NULL) 605 return String8(ext); 606 else 607 return String8(""); 608} 609 610String8 String8::getBasePath(void) const 611{ 612 char* ext; 613 const char* const str = mString; 614 615 ext = find_extension(); 616 if (ext == NULL) 617 return String8(*this); 618 else 619 return String8(str, ext - str); 620} 621 622String8& String8::appendPath(const char* name) 623{ 624 // TODO: The test below will fail for Win32 paths. Fix later or ignore. 625 if (name[0] != OS_PATH_SEPARATOR) { 626 if (*name == '\0') { 627 // nothing to do 628 return *this; 629 } 630 631 size_t len = length(); 632 if (len == 0) { 633 // no existing filename, just use the new one 634 setPathName(name); 635 return *this; 636 } 637 638 // make room for oldPath + '/' + newPath 639 int newlen = strlen(name); 640 641 char* buf = lockBuffer(len+1+newlen); 642 643 // insert a '/' if needed 644 if (buf[len-1] != OS_PATH_SEPARATOR) 645 buf[len++] = OS_PATH_SEPARATOR; 646 647 memcpy(buf+len, name, newlen+1); 648 len += newlen; 649 650 unlockBuffer(len); 651 652 return *this; 653 } else { 654 setPathName(name); 655 return *this; 656 } 657} 658 659String8& String8::convertToResPath() 660{ 661#if OS_PATH_SEPARATOR != RES_PATH_SEPARATOR 662 size_t len = length(); 663 if (len > 0) { 664 char * buf = lockBuffer(len); 665 for (char * end = buf + len; buf < end; ++buf) { 666 if (*buf == OS_PATH_SEPARATOR) 667 *buf = RES_PATH_SEPARATOR; 668 } 669 unlockBuffer(len); 670 } 671#endif 672 return *this; 673} 674 675}; // namespace android 676 677// --------------------------------------------------------------------------- 678 679size_t strlen32(const char32_t *s) 680{ 681 const char32_t *ss = s; 682 while ( *ss ) 683 ss++; 684 return ss-s; 685} 686 687size_t strnlen32(const char32_t *s, size_t maxlen) 688{ 689 const char32_t *ss = s; 690 while ((maxlen > 0) && *ss) { 691 ss++; 692 maxlen--; 693 } 694 return ss-s; 695} 696 697size_t utf8_length(const char *src) 698{ 699 const char *cur = src; 700 size_t ret = 0; 701 while (*cur != '\0') { 702 const char first_char = *cur++; 703 if ((first_char & 0x80) == 0) { // ASCII 704 ret += 1; 705 continue; 706 } 707 // (UTF-8's character must not be like 10xxxxxx, 708 // but 110xxxxx, 1110xxxx, ... or 1111110x) 709 if ((first_char & 0x40) == 0) { 710 return 0; 711 } 712 713 int32_t mask, to_ignore_mask; 714 size_t num_to_read = 0; 715 char32_t utf32 = 0; 716 for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80; 717 num_to_read < 5 && (first_char & mask); 718 num_to_read++, to_ignore_mask |= mask, mask >>= 1) { 719 if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx 720 return 0; 721 } 722 // 0x3F == 00111111 723 utf32 = (utf32 << 6) + (*cur++ & 0x3F); 724 } 725 // "first_char" must be (110xxxxx - 11110xxx) 726 if (num_to_read == 5) { 727 return 0; 728 } 729 to_ignore_mask |= mask; 730 utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1)); 731 if (utf32 > android::kUnicodeMaxCodepoint) { 732 return 0; 733 } 734 735 ret += num_to_read; 736 } 737 return ret; 738} 739 740size_t utf32_length(const char *src, size_t src_len) 741{ 742 if (src == NULL || src_len == 0) { 743 return 0; 744 } 745 size_t ret = 0; 746 const char* cur; 747 const char* end; 748 size_t num_to_skip; 749 for (cur = src, end = src + src_len, num_to_skip = 1; 750 cur < end; 751 cur += num_to_skip, ret++) { 752 const char first_char = *cur; 753 num_to_skip = 1; 754 if ((first_char & 0x80) == 0) { // ASCII 755 continue; 756 } 757 int32_t mask; 758 759 for (mask = 0x40; (first_char & mask); num_to_skip++, mask >>= 1) { 760 } 761 } 762 return ret; 763} 764 765size_t utf8_length_from_utf32(const char32_t *src, size_t src_len) 766{ 767 if (src == NULL || src_len == 0) { 768 return 0; 769 } 770 size_t ret = 0; 771 const char32_t *end = src + src_len; 772 while (src < end) { 773 ret += android::utf32_to_utf8_bytes(*src++); 774 } 775 return ret; 776} 777 778size_t utf8_length_from_utf16(const char16_t *src, size_t src_len) 779{ 780 if (src == NULL || src_len == 0) { 781 return 0; 782 } 783 size_t ret = 0; 784 const char16_t* const end = src + src_len; 785 while (src < end) { 786 if ((*src & 0xFC00) == 0xD800 && (src + 1) < end 787 && (*++src & 0xFC00) == 0xDC00) { 788 // surrogate pairs are always 4 bytes. 789 ret += 4; 790 src++; 791 } else { 792 ret += android::utf32_to_utf8_bytes((char32_t) *src++); 793 } 794 } 795 return ret; 796} 797 798static int32_t utf32_at_internal(const char* cur, size_t *num_read) 799{ 800 const char first_char = *cur; 801 if ((first_char & 0x80) == 0) { // ASCII 802 *num_read = 1; 803 return *cur; 804 } 805 cur++; 806 char32_t mask, to_ignore_mask; 807 size_t num_to_read = 0; 808 char32_t utf32 = first_char; 809 for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0xFFFFFF80; 810 (first_char & mask); 811 num_to_read++, to_ignore_mask |= mask, mask >>= 1) { 812 // 0x3F == 00111111 813 utf32 = (utf32 << 6) + (*cur++ & 0x3F); 814 } 815 to_ignore_mask |= mask; 816 utf32 &= ~(to_ignore_mask << (6 * (num_to_read - 1))); 817 818 *num_read = num_to_read; 819 return static_cast<int32_t>(utf32); 820} 821 822int32_t utf32_at(const char *src, size_t src_len, 823 size_t index, size_t *next_index) 824{ 825 if (index >= src_len) { 826 return -1; 827 } 828 size_t dummy_index; 829 if (next_index == NULL) { 830 next_index = &dummy_index; 831 } 832 size_t num_read; 833 int32_t ret = utf32_at_internal(src + index, &num_read); 834 if (ret >= 0) { 835 *next_index = index + num_read; 836 } 837 838 return ret; 839} 840 841size_t utf8_to_utf32(const char* src, size_t src_len, 842 char32_t* dst, size_t dst_len) 843{ 844 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 845 return 0; 846 } 847 848 const char* cur = src; 849 const char* end = src + src_len; 850 char32_t* cur_utf32 = dst; 851 const char32_t* end_utf32 = dst + dst_len; 852 while (cur_utf32 < end_utf32 && cur < end) { 853 size_t num_read; 854 *cur_utf32++ = 855 static_cast<char32_t>(utf32_at_internal(cur, &num_read)); 856 cur += num_read; 857 } 858 if (cur_utf32 < end_utf32) { 859 *cur_utf32 = 0; 860 } 861 return static_cast<size_t>(cur_utf32 - dst); 862} 863 864size_t utf32_to_utf8(const char32_t* src, size_t src_len, 865 char* dst, size_t dst_len) 866{ 867 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 868 return 0; 869 } 870 const char32_t *cur_utf32 = src; 871 const char32_t *end_utf32 = src + src_len; 872 char *cur = dst; 873 const char *end = dst + dst_len; 874 while (cur_utf32 < end_utf32 && cur < end) { 875 size_t len = android::utf32_to_utf8_bytes(*cur_utf32); 876 android::utf32_to_utf8((uint8_t *)cur, *cur_utf32++, len); 877 cur += len; 878 } 879 if (cur < end) { 880 *cur = '\0'; 881 } 882 return cur - dst; 883} 884 885size_t utf16_to_utf8(const char16_t* src, size_t src_len, 886 char* dst, size_t dst_len) 887{ 888 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 889 return 0; 890 } 891 const char16_t* cur_utf16 = src; 892 const char16_t* const end_utf16 = src + src_len; 893 char *cur = dst; 894 const char* const end = dst + dst_len; 895 while (cur_utf16 < end_utf16 && cur < end) { 896 char32_t utf32; 897 // surrogate pairs 898 if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) { 899 utf32 = (*cur_utf16++ - 0xD800) << 10; 900 utf32 |= *cur_utf16++ - 0xDC00; 901 utf32 += 0x10000; 902 } else { 903 utf32 = (char32_t) *cur_utf16++; 904 } 905 size_t len = android::utf32_to_utf8_bytes(utf32); 906 android::utf32_to_utf8((uint8_t*)cur, utf32, len); 907 cur += len; 908 } 909 if (cur < end) { 910 *cur = '\0'; 911 } 912 return cur - dst; 913} 914