String8.cpp revision 6a817e22e4b09a982ba17c1aff57f9fcb735c950
1/* 2 * Copyright (C) 2005 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <utils/String8.h> 18 19#include <utils/Log.h> 20#include <utils/String16.h> 21#include <utils/TextOutput.h> 22#include <utils/threads.h> 23 24#include <private/utils/Static.h> 25 26#include <ctype.h> 27 28/* 29 * Functions outside android is below the namespace android, since they use 30 * functions and constants in android namespace. 31 */ 32 33// --------------------------------------------------------------------------- 34 35namespace android { 36 37static const char32_t kByteMask = 0x000000BF; 38static const char32_t kByteMark = 0x00000080; 39 40// Surrogates aren't valid for UTF-32 characters, so define some 41// constants that will let us screen them out. 42static const char32_t kUnicodeSurrogateHighStart = 0x0000D800; 43static const char32_t kUnicodeSurrogateHighEnd = 0x0000DBFF; 44static const char32_t kUnicodeSurrogateLowStart = 0x0000DC00; 45static const char32_t kUnicodeSurrogateLowEnd = 0x0000DFFF; 46static const char32_t kUnicodeSurrogateStart = kUnicodeSurrogateHighStart; 47static const char32_t kUnicodeSurrogateEnd = kUnicodeSurrogateLowEnd; 48static const char32_t kUnicodeMaxCodepoint = 0x0010FFFF; 49 50// Mask used to set appropriate bits in first byte of UTF-8 sequence, 51// indexed by number of bytes in the sequence. 52// 0xxxxxxx 53// -> (00-7f) 7bit. Bit mask for the first byte is 0x00000000 54// 110yyyyx 10xxxxxx 55// -> (c0-df)(80-bf) 11bit. Bit mask is 0x000000C0 56// 1110yyyy 10yxxxxx 10xxxxxx 57// -> (e0-ef)(80-bf)(80-bf) 16bit. Bit mask is 0x000000E0 58// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx 59// -> (f0-f7)(80-bf)(80-bf)(80-bf) 21bit. Bit mask is 0x000000F0 60static const char32_t kFirstByteMark[] = { 61 0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0 62}; 63 64// Separator used by resource paths. This is not platform dependent contrary 65// to OS_PATH_SEPARATOR. 66#define RES_PATH_SEPARATOR '/' 67 68// Return number of utf8 bytes required for the character. 69static size_t utf32_to_utf8_bytes(char32_t srcChar) 70{ 71 size_t bytesToWrite; 72 73 // Figure out how many bytes the result will require. 74 if (srcChar < 0x00000080) 75 { 76 bytesToWrite = 1; 77 } 78 else if (srcChar < 0x00000800) 79 { 80 bytesToWrite = 2; 81 } 82 else if (srcChar < 0x00010000) 83 { 84 if ((srcChar < kUnicodeSurrogateStart) 85 || (srcChar > kUnicodeSurrogateEnd)) 86 { 87 bytesToWrite = 3; 88 } 89 else 90 { 91 // Surrogates are invalid UTF-32 characters. 92 return 0; 93 } 94 } 95 // Max code point for Unicode is 0x0010FFFF. 96 else if (srcChar <= kUnicodeMaxCodepoint) 97 { 98 bytesToWrite = 4; 99 } 100 else 101 { 102 // Invalid UTF-32 character. 103 return 0; 104 } 105 106 return bytesToWrite; 107} 108 109// Write out the source character to <dstP>. 110 111static void utf32_to_utf8(uint8_t* dstP, char32_t srcChar, size_t bytes) 112{ 113 dstP += bytes; 114 switch (bytes) 115 { /* note: everything falls through. */ 116 case 4: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 117 case 3: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 118 case 2: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 119 case 1: *--dstP = (uint8_t)(srcChar | kFirstByteMark[bytes]); 120 } 121} 122 123// --------------------------------------------------------------------------- 124 125static SharedBuffer* gEmptyStringBuf = NULL; 126static char* gEmptyString = NULL; 127 128extern int gDarwinCantLoadAllObjects; 129int gDarwinIsReallyAnnoying; 130 131static inline char* getEmptyString() 132{ 133 gEmptyStringBuf->acquire(); 134 return gEmptyString; 135} 136 137void initialize_string8() 138{ 139 // HACK: This dummy dependency forces linking libutils Static.cpp, 140 // which is needed to initialize String8/String16 classes. 141 // These variables are named for Darwin, but are needed elsewhere too, 142 // including static linking on any platform. 143 gDarwinIsReallyAnnoying = gDarwinCantLoadAllObjects; 144 145 SharedBuffer* buf = SharedBuffer::alloc(1); 146 char* str = (char*)buf->data(); 147 *str = 0; 148 gEmptyStringBuf = buf; 149 gEmptyString = str; 150} 151 152void terminate_string8() 153{ 154 SharedBuffer::bufferFromData(gEmptyString)->release(); 155 gEmptyStringBuf = NULL; 156 gEmptyString = NULL; 157} 158 159// --------------------------------------------------------------------------- 160 161static char* allocFromUTF8(const char* in, size_t len) 162{ 163 if (len > 0) { 164 SharedBuffer* buf = SharedBuffer::alloc(len+1); 165 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 166 if (buf) { 167 char* str = (char*)buf->data(); 168 memcpy(str, in, len); 169 str[len] = 0; 170 return str; 171 } 172 return NULL; 173 } 174 175 return getEmptyString(); 176} 177 178template<typename T, typename L> 179static char* allocFromUTF16OrUTF32(const T* in, L len) 180{ 181 if (len == 0) return getEmptyString(); 182 183 size_t bytes = 0; 184 const T* end = in+len; 185 const T* p = in; 186 187 while (p < end) { 188 bytes += utf32_to_utf8_bytes(*p); 189 p++; 190 } 191 192 SharedBuffer* buf = SharedBuffer::alloc(bytes+1); 193 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 194 if (buf) { 195 p = in; 196 char* str = (char*)buf->data(); 197 char* d = str; 198 while (p < end) { 199 const T c = *p++; 200 size_t len = utf32_to_utf8_bytes(c); 201 utf32_to_utf8((uint8_t*)d, c, len); 202 d += len; 203 } 204 *d = 0; 205 206 return str; 207 } 208 209 return getEmptyString(); 210} 211 212static char* allocFromUTF16(const char16_t* in, size_t len) 213{ 214 if (len == 0) return getEmptyString(); 215 216 const size_t bytes = utf8_length_from_utf16(in, len); 217 218 SharedBuffer* buf = SharedBuffer::alloc(bytes+1); 219 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 220 if (buf) { 221 char* str = (char*)buf->data(); 222 223 utf16_to_utf8(in, len, str, bytes+1); 224 225 return str; 226 } 227 228 return getEmptyString(); 229} 230 231static char* allocFromUTF32(const char32_t* in, size_t len) 232{ 233 return allocFromUTF16OrUTF32<char32_t, size_t>(in, len); 234} 235 236// --------------------------------------------------------------------------- 237 238String8::String8() 239 : mString(getEmptyString()) 240{ 241} 242 243String8::String8(const String8& o) 244 : mString(o.mString) 245{ 246 SharedBuffer::bufferFromData(mString)->acquire(); 247} 248 249String8::String8(const char* o) 250 : mString(allocFromUTF8(o, strlen(o))) 251{ 252 if (mString == NULL) { 253 mString = getEmptyString(); 254 } 255} 256 257String8::String8(const char* o, size_t len) 258 : mString(allocFromUTF8(o, len)) 259{ 260 if (mString == NULL) { 261 mString = getEmptyString(); 262 } 263} 264 265String8::String8(const String16& o) 266 : mString(allocFromUTF16(o.string(), o.size())) 267{ 268} 269 270String8::String8(const char16_t* o) 271 : mString(allocFromUTF16(o, strlen16(o))) 272{ 273} 274 275String8::String8(const char16_t* o, size_t len) 276 : mString(allocFromUTF16(o, len)) 277{ 278} 279 280String8::String8(const char32_t* o) 281 : mString(allocFromUTF32(o, strlen32(o))) 282{ 283} 284 285String8::String8(const char32_t* o, size_t len) 286 : mString(allocFromUTF32(o, len)) 287{ 288} 289 290String8::~String8() 291{ 292 SharedBuffer::bufferFromData(mString)->release(); 293} 294 295void String8::clear() { 296 SharedBuffer::bufferFromData(mString)->release(); 297 mString = getEmptyString(); 298} 299 300void String8::setTo(const String8& other) 301{ 302 SharedBuffer::bufferFromData(other.mString)->acquire(); 303 SharedBuffer::bufferFromData(mString)->release(); 304 mString = other.mString; 305} 306 307status_t String8::setTo(const char* other) 308{ 309 const char *newString = allocFromUTF8(other, strlen(other)); 310 SharedBuffer::bufferFromData(mString)->release(); 311 mString = newString; 312 if (mString) return NO_ERROR; 313 314 mString = getEmptyString(); 315 return NO_MEMORY; 316} 317 318status_t String8::setTo(const char* other, size_t len) 319{ 320 const char *newString = allocFromUTF8(other, len); 321 SharedBuffer::bufferFromData(mString)->release(); 322 mString = newString; 323 if (mString) return NO_ERROR; 324 325 mString = getEmptyString(); 326 return NO_MEMORY; 327} 328 329status_t String8::setTo(const char16_t* other, size_t len) 330{ 331 const char *newString = allocFromUTF16(other, len); 332 SharedBuffer::bufferFromData(mString)->release(); 333 mString = newString; 334 if (mString) return NO_ERROR; 335 336 mString = getEmptyString(); 337 return NO_MEMORY; 338} 339 340status_t String8::setTo(const char32_t* other, size_t len) 341{ 342 const char *newString = allocFromUTF32(other, len); 343 SharedBuffer::bufferFromData(mString)->release(); 344 mString = newString; 345 if (mString) return NO_ERROR; 346 347 mString = getEmptyString(); 348 return NO_MEMORY; 349} 350 351status_t String8::append(const String8& other) 352{ 353 const size_t otherLen = other.bytes(); 354 if (bytes() == 0) { 355 setTo(other); 356 return NO_ERROR; 357 } else if (otherLen == 0) { 358 return NO_ERROR; 359 } 360 361 return real_append(other.string(), otherLen); 362} 363 364status_t String8::append(const char* other) 365{ 366 return append(other, strlen(other)); 367} 368 369status_t String8::append(const char* other, size_t otherLen) 370{ 371 if (bytes() == 0) { 372 return setTo(other, otherLen); 373 } else if (otherLen == 0) { 374 return NO_ERROR; 375 } 376 377 return real_append(other, otherLen); 378} 379 380status_t String8::appendFormat(const char* fmt, ...) 381{ 382 va_list ap; 383 va_start(ap, fmt); 384 385 int result = NO_ERROR; 386 int n = vsnprintf(NULL, 0, fmt, ap); 387 if (n != 0) { 388 size_t oldLength = length(); 389 char* buf = lockBuffer(oldLength + n); 390 if (buf) { 391 vsnprintf(buf + oldLength, n + 1, fmt, ap); 392 } else { 393 result = NO_MEMORY; 394 } 395 } 396 397 va_end(ap); 398 return result; 399} 400 401status_t String8::real_append(const char* other, size_t otherLen) 402{ 403 const size_t myLen = bytes(); 404 405 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 406 ->editResize(myLen+otherLen+1); 407 if (buf) { 408 char* str = (char*)buf->data(); 409 mString = str; 410 str += myLen; 411 memcpy(str, other, otherLen); 412 str[otherLen] = '\0'; 413 return NO_ERROR; 414 } 415 return NO_MEMORY; 416} 417 418char* String8::lockBuffer(size_t size) 419{ 420 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 421 ->editResize(size+1); 422 if (buf) { 423 char* str = (char*)buf->data(); 424 mString = str; 425 return str; 426 } 427 return NULL; 428} 429 430void String8::unlockBuffer() 431{ 432 unlockBuffer(strlen(mString)); 433} 434 435status_t String8::unlockBuffer(size_t size) 436{ 437 if (size != this->size()) { 438 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 439 ->editResize(size+1); 440 if (! buf) { 441 return NO_MEMORY; 442 } 443 444 char* str = (char*)buf->data(); 445 str[size] = 0; 446 mString = str; 447 } 448 449 return NO_ERROR; 450} 451 452ssize_t String8::find(const char* other, size_t start) const 453{ 454 size_t len = size(); 455 if (start >= len) { 456 return -1; 457 } 458 const char* s = mString+start; 459 const char* p = strstr(s, other); 460 return p ? p-mString : -1; 461} 462 463void String8::toLower() 464{ 465 toLower(0, size()); 466} 467 468void String8::toLower(size_t start, size_t length) 469{ 470 const size_t len = size(); 471 if (start >= len) { 472 return; 473 } 474 if (start+length > len) { 475 length = len-start; 476 } 477 char* buf = lockBuffer(len); 478 buf += start; 479 while (length > 0) { 480 *buf = tolower(*buf); 481 buf++; 482 length--; 483 } 484 unlockBuffer(len); 485} 486 487void String8::toUpper() 488{ 489 toUpper(0, size()); 490} 491 492void String8::toUpper(size_t start, size_t length) 493{ 494 const size_t len = size(); 495 if (start >= len) { 496 return; 497 } 498 if (start+length > len) { 499 length = len-start; 500 } 501 char* buf = lockBuffer(len); 502 buf += start; 503 while (length > 0) { 504 *buf = toupper(*buf); 505 buf++; 506 length--; 507 } 508 unlockBuffer(len); 509} 510 511size_t String8::getUtf32Length() const 512{ 513 return utf32_length(mString, length()); 514} 515 516int32_t String8::getUtf32At(size_t index, size_t *next_index) const 517{ 518 return utf32_at(mString, length(), index, next_index); 519} 520 521size_t String8::getUtf32(char32_t* dst, size_t dst_len) const 522{ 523 return utf8_to_utf32(mString, length(), dst, dst_len); 524} 525 526TextOutput& operator<<(TextOutput& to, const String8& val) 527{ 528 to << val.string(); 529 return to; 530} 531 532// --------------------------------------------------------------------------- 533// Path functions 534 535void String8::setPathName(const char* name) 536{ 537 setPathName(name, strlen(name)); 538} 539 540void String8::setPathName(const char* name, size_t len) 541{ 542 char* buf = lockBuffer(len); 543 544 memcpy(buf, name, len); 545 546 // remove trailing path separator, if present 547 if (len > 0 && buf[len-1] == OS_PATH_SEPARATOR) 548 len--; 549 550 buf[len] = '\0'; 551 552 unlockBuffer(len); 553} 554 555String8 String8::getPathLeaf(void) const 556{ 557 const char* cp; 558 const char*const buf = mString; 559 560 cp = strrchr(buf, OS_PATH_SEPARATOR); 561 if (cp == NULL) 562 return String8(*this); 563 else 564 return String8(cp+1); 565} 566 567String8 String8::getPathDir(void) const 568{ 569 const char* cp; 570 const char*const str = mString; 571 572 cp = strrchr(str, OS_PATH_SEPARATOR); 573 if (cp == NULL) 574 return String8(""); 575 else 576 return String8(str, cp - str); 577} 578 579String8 String8::walkPath(String8* outRemains) const 580{ 581 const char* cp; 582 const char*const str = mString; 583 const char* buf = str; 584 585 cp = strchr(buf, OS_PATH_SEPARATOR); 586 if (cp == buf) { 587 // don't include a leading '/'. 588 buf = buf+1; 589 cp = strchr(buf, OS_PATH_SEPARATOR); 590 } 591 592 if (cp == NULL) { 593 String8 res = buf != str ? String8(buf) : *this; 594 if (outRemains) *outRemains = String8(""); 595 return res; 596 } 597 598 String8 res(buf, cp-buf); 599 if (outRemains) *outRemains = String8(cp+1); 600 return res; 601} 602 603/* 604 * Helper function for finding the start of an extension in a pathname. 605 * 606 * Returns a pointer inside mString, or NULL if no extension was found. 607 */ 608char* String8::find_extension(void) const 609{ 610 const char* lastSlash; 611 const char* lastDot; 612 int extLen; 613 const char* const str = mString; 614 615 // only look at the filename 616 lastSlash = strrchr(str, OS_PATH_SEPARATOR); 617 if (lastSlash == NULL) 618 lastSlash = str; 619 else 620 lastSlash++; 621 622 // find the last dot 623 lastDot = strrchr(lastSlash, '.'); 624 if (lastDot == NULL) 625 return NULL; 626 627 // looks good, ship it 628 return const_cast<char*>(lastDot); 629} 630 631String8 String8::getPathExtension(void) const 632{ 633 char* ext; 634 635 ext = find_extension(); 636 if (ext != NULL) 637 return String8(ext); 638 else 639 return String8(""); 640} 641 642String8 String8::getBasePath(void) const 643{ 644 char* ext; 645 const char* const str = mString; 646 647 ext = find_extension(); 648 if (ext == NULL) 649 return String8(*this); 650 else 651 return String8(str, ext - str); 652} 653 654String8& String8::appendPath(const char* name) 655{ 656 // TODO: The test below will fail for Win32 paths. Fix later or ignore. 657 if (name[0] != OS_PATH_SEPARATOR) { 658 if (*name == '\0') { 659 // nothing to do 660 return *this; 661 } 662 663 size_t len = length(); 664 if (len == 0) { 665 // no existing filename, just use the new one 666 setPathName(name); 667 return *this; 668 } 669 670 // make room for oldPath + '/' + newPath 671 int newlen = strlen(name); 672 673 char* buf = lockBuffer(len+1+newlen); 674 675 // insert a '/' if needed 676 if (buf[len-1] != OS_PATH_SEPARATOR) 677 buf[len++] = OS_PATH_SEPARATOR; 678 679 memcpy(buf+len, name, newlen+1); 680 len += newlen; 681 682 unlockBuffer(len); 683 684 return *this; 685 } else { 686 setPathName(name); 687 return *this; 688 } 689} 690 691String8& String8::convertToResPath() 692{ 693#if OS_PATH_SEPARATOR != RES_PATH_SEPARATOR 694 size_t len = length(); 695 if (len > 0) { 696 char * buf = lockBuffer(len); 697 for (char * end = buf + len; buf < end; ++buf) { 698 if (*buf == OS_PATH_SEPARATOR) 699 *buf = RES_PATH_SEPARATOR; 700 } 701 unlockBuffer(len); 702 } 703#endif 704 return *this; 705} 706 707}; // namespace android 708 709// --------------------------------------------------------------------------- 710 711size_t strlen32(const char32_t *s) 712{ 713 const char32_t *ss = s; 714 while ( *ss ) 715 ss++; 716 return ss-s; 717} 718 719size_t strnlen32(const char32_t *s, size_t maxlen) 720{ 721 const char32_t *ss = s; 722 while ((maxlen > 0) && *ss) { 723 ss++; 724 maxlen--; 725 } 726 return ss-s; 727} 728 729size_t utf8_length(const char *src) 730{ 731 const char *cur = src; 732 size_t ret = 0; 733 while (*cur != '\0') { 734 const char first_char = *cur++; 735 if ((first_char & 0x80) == 0) { // ASCII 736 ret += 1; 737 continue; 738 } 739 // (UTF-8's character must not be like 10xxxxxx, 740 // but 110xxxxx, 1110xxxx, ... or 1111110x) 741 if ((first_char & 0x40) == 0) { 742 return 0; 743 } 744 745 int32_t mask, to_ignore_mask; 746 size_t num_to_read = 0; 747 char32_t utf32 = 0; 748 for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80; 749 num_to_read < 5 && (first_char & mask); 750 num_to_read++, to_ignore_mask |= mask, mask >>= 1) { 751 if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx 752 return 0; 753 } 754 // 0x3F == 00111111 755 utf32 = (utf32 << 6) + (*cur++ & 0x3F); 756 } 757 // "first_char" must be (110xxxxx - 11110xxx) 758 if (num_to_read == 5) { 759 return 0; 760 } 761 to_ignore_mask |= mask; 762 utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1)); 763 if (utf32 > android::kUnicodeMaxCodepoint) { 764 return 0; 765 } 766 767 ret += num_to_read; 768 } 769 return ret; 770} 771 772size_t utf32_length(const char *src, size_t src_len) 773{ 774 if (src == NULL || src_len == 0) { 775 return 0; 776 } 777 size_t ret = 0; 778 const char* cur; 779 const char* end; 780 size_t num_to_skip; 781 for (cur = src, end = src + src_len, num_to_skip = 1; 782 cur < end; 783 cur += num_to_skip, ret++) { 784 const char first_char = *cur; 785 num_to_skip = 1; 786 if ((first_char & 0x80) == 0) { // ASCII 787 continue; 788 } 789 int32_t mask; 790 791 for (mask = 0x40; (first_char & mask); num_to_skip++, mask >>= 1) { 792 } 793 } 794 return ret; 795} 796 797size_t utf8_length_from_utf32(const char32_t *src, size_t src_len) 798{ 799 if (src == NULL || src_len == 0) { 800 return 0; 801 } 802 size_t ret = 0; 803 const char32_t *end = src + src_len; 804 while (src < end) { 805 ret += android::utf32_to_utf8_bytes(*src++); 806 } 807 return ret; 808} 809 810size_t utf8_length_from_utf16(const char16_t *src, size_t src_len) 811{ 812 if (src == NULL || src_len == 0) { 813 return 0; 814 } 815 size_t ret = 0; 816 const char16_t* const end = src + src_len; 817 while (src < end) { 818 if ((*src & 0xFC00) == 0xD800 && (src + 1) < end 819 && (*++src & 0xFC00) == 0xDC00) { 820 // surrogate pairs are always 4 bytes. 821 ret += 4; 822 src++; 823 } else { 824 ret += android::utf32_to_utf8_bytes((char32_t) *src++); 825 } 826 } 827 return ret; 828} 829 830static int32_t utf32_at_internal(const char* cur, size_t *num_read) 831{ 832 const char first_char = *cur; 833 if ((first_char & 0x80) == 0) { // ASCII 834 *num_read = 1; 835 return *cur; 836 } 837 cur++; 838 char32_t mask, to_ignore_mask; 839 size_t num_to_read = 0; 840 char32_t utf32 = first_char; 841 for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0xFFFFFF80; 842 (first_char & mask); 843 num_to_read++, to_ignore_mask |= mask, mask >>= 1) { 844 // 0x3F == 00111111 845 utf32 = (utf32 << 6) + (*cur++ & 0x3F); 846 } 847 to_ignore_mask |= mask; 848 utf32 &= ~(to_ignore_mask << (6 * (num_to_read - 1))); 849 850 *num_read = num_to_read; 851 return static_cast<int32_t>(utf32); 852} 853 854int32_t utf32_at(const char *src, size_t src_len, 855 size_t index, size_t *next_index) 856{ 857 if (index >= src_len) { 858 return -1; 859 } 860 size_t dummy_index; 861 if (next_index == NULL) { 862 next_index = &dummy_index; 863 } 864 size_t num_read; 865 int32_t ret = utf32_at_internal(src + index, &num_read); 866 if (ret >= 0) { 867 *next_index = index + num_read; 868 } 869 870 return ret; 871} 872 873size_t utf8_to_utf32(const char* src, size_t src_len, 874 char32_t* dst, size_t dst_len) 875{ 876 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 877 return 0; 878 } 879 880 const char* cur = src; 881 const char* end = src + src_len; 882 char32_t* cur_utf32 = dst; 883 const char32_t* end_utf32 = dst + dst_len; 884 while (cur_utf32 < end_utf32 && cur < end) { 885 size_t num_read; 886 *cur_utf32++ = 887 static_cast<char32_t>(utf32_at_internal(cur, &num_read)); 888 cur += num_read; 889 } 890 if (cur_utf32 < end_utf32) { 891 *cur_utf32 = 0; 892 } 893 return static_cast<size_t>(cur_utf32 - dst); 894} 895 896size_t utf32_to_utf8(const char32_t* src, size_t src_len, 897 char* dst, size_t dst_len) 898{ 899 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 900 return 0; 901 } 902 const char32_t *cur_utf32 = src; 903 const char32_t *end_utf32 = src + src_len; 904 char *cur = dst; 905 const char *end = dst + dst_len; 906 while (cur_utf32 < end_utf32 && cur < end) { 907 size_t len = android::utf32_to_utf8_bytes(*cur_utf32); 908 android::utf32_to_utf8((uint8_t *)cur, *cur_utf32++, len); 909 cur += len; 910 } 911 if (cur < end) { 912 *cur = '\0'; 913 } 914 return cur - dst; 915} 916 917size_t utf16_to_utf8(const char16_t* src, size_t src_len, 918 char* dst, size_t dst_len) 919{ 920 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 921 return 0; 922 } 923 const char16_t* cur_utf16 = src; 924 const char16_t* const end_utf16 = src + src_len; 925 char *cur = dst; 926 const char* const end = dst + dst_len; 927 while (cur_utf16 < end_utf16 && cur < end) { 928 char32_t utf32; 929 // surrogate pairs 930 if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) { 931 utf32 = (*cur_utf16++ - 0xD800) << 10; 932 utf32 |= *cur_utf16++ - 0xDC00; 933 utf32 += 0x10000; 934 } else { 935 utf32 = (char32_t) *cur_utf16++; 936 } 937 size_t len = android::utf32_to_utf8_bytes(utf32); 938 android::utf32_to_utf8((uint8_t*)cur, utf32, len); 939 cur += len; 940 } 941 if (cur < end) { 942 *cur = '\0'; 943 } 944 return cur - dst; 945} 946