1/* 2 * Copyright (C) 2005 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <utils/String8.h> 18 19#include <utils/Log.h> 20#include <utils/String16.h> 21#include <utils/TextOutput.h> 22#include <utils/threads.h> 23 24#include <private/utils/Static.h> 25 26#include <ctype.h> 27 28/* 29 * Functions outside android is below the namespace android, since they use 30 * functions and constants in android namespace. 31 */ 32 33// --------------------------------------------------------------------------- 34 35namespace android { 36 37static const char32_t kByteMask = 0x000000BF; 38static const char32_t kByteMark = 0x00000080; 39 40// Surrogates aren't valid for UTF-32 characters, so define some 41// constants that will let us screen them out. 42static const char32_t kUnicodeSurrogateHighStart = 0x0000D800; 43static const char32_t kUnicodeSurrogateHighEnd = 0x0000DBFF; 44static const char32_t kUnicodeSurrogateLowStart = 0x0000DC00; 45static const char32_t kUnicodeSurrogateLowEnd = 0x0000DFFF; 46static const char32_t kUnicodeSurrogateStart = kUnicodeSurrogateHighStart; 47static const char32_t kUnicodeSurrogateEnd = kUnicodeSurrogateLowEnd; 48static const char32_t kUnicodeMaxCodepoint = 0x0010FFFF; 49 50// Mask used to set appropriate bits in first byte of UTF-8 sequence, 51// indexed by number of bytes in the sequence. 52// 0xxxxxxx 53// -> (00-7f) 7bit. Bit mask for the first byte is 0x00000000 54// 110yyyyx 10xxxxxx 55// -> (c0-df)(80-bf) 11bit. Bit mask is 0x000000C0 56// 1110yyyy 10yxxxxx 10xxxxxx 57// -> (e0-ef)(80-bf)(80-bf) 16bit. Bit mask is 0x000000E0 58// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx 59// -> (f0-f7)(80-bf)(80-bf)(80-bf) 21bit. Bit mask is 0x000000F0 60static const char32_t kFirstByteMark[] = { 61 0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0 62}; 63 64// Separator used by resource paths. This is not platform dependent contrary 65// to OS_PATH_SEPARATOR. 66#define RES_PATH_SEPARATOR '/' 67 68// Return number of utf8 bytes required for the character. 69static size_t utf32_to_utf8_bytes(char32_t srcChar) 70{ 71 size_t bytesToWrite; 72 73 // Figure out how many bytes the result will require. 74 if (srcChar < 0x00000080) 75 { 76 bytesToWrite = 1; 77 } 78 else if (srcChar < 0x00000800) 79 { 80 bytesToWrite = 2; 81 } 82 else if (srcChar < 0x00010000) 83 { 84 if ((srcChar < kUnicodeSurrogateStart) 85 || (srcChar > kUnicodeSurrogateEnd)) 86 { 87 bytesToWrite = 3; 88 } 89 else 90 { 91 // Surrogates are invalid UTF-32 characters. 92 return 0; 93 } 94 } 95 // Max code point for Unicode is 0x0010FFFF. 96 else if (srcChar <= kUnicodeMaxCodepoint) 97 { 98 bytesToWrite = 4; 99 } 100 else 101 { 102 // Invalid UTF-32 character. 103 return 0; 104 } 105 106 return bytesToWrite; 107} 108 109// Write out the source character to <dstP>. 110 111static void utf32_to_utf8(uint8_t* dstP, char32_t srcChar, size_t bytes) 112{ 113 dstP += bytes; 114 switch (bytes) 115 { /* note: everything falls through. */ 116 case 4: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 117 case 3: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 118 case 2: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 119 case 1: *--dstP = (uint8_t)(srcChar | kFirstByteMark[bytes]); 120 } 121} 122 123// --------------------------------------------------------------------------- 124 125static SharedBuffer* gEmptyStringBuf = NULL; 126static char* gEmptyString = NULL; 127 128extern int gDarwinCantLoadAllObjects; 129int gDarwinIsReallyAnnoying; 130 131static inline char* getEmptyString() 132{ 133 gEmptyStringBuf->acquire(); 134 return gEmptyString; 135} 136 137void initialize_string8() 138{ 139 // HACK: This dummy dependency forces linking libutils Static.cpp, 140 // which is needed to initialize String8/String16 classes. 141 // These variables are named for Darwin, but are needed elsewhere too, 142 // including static linking on any platform. 143 gDarwinIsReallyAnnoying = gDarwinCantLoadAllObjects; 144 145 SharedBuffer* buf = SharedBuffer::alloc(1); 146 char* str = (char*)buf->data(); 147 *str = 0; 148 gEmptyStringBuf = buf; 149 gEmptyString = str; 150} 151 152void terminate_string8() 153{ 154 SharedBuffer::bufferFromData(gEmptyString)->release(); 155 gEmptyStringBuf = NULL; 156 gEmptyString = NULL; 157} 158 159// --------------------------------------------------------------------------- 160 161static char* allocFromUTF8(const char* in, size_t len) 162{ 163 if (len > 0) { 164 SharedBuffer* buf = SharedBuffer::alloc(len+1); 165 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 166 if (buf) { 167 char* str = (char*)buf->data(); 168 memcpy(str, in, len); 169 str[len] = 0; 170 return str; 171 } 172 return NULL; 173 } 174 175 return getEmptyString(); 176} 177 178template<typename T, typename L> 179static char* allocFromUTF16OrUTF32(const T* in, L len) 180{ 181 if (len == 0) return getEmptyString(); 182 183 size_t bytes = 0; 184 const T* end = in+len; 185 const T* p = in; 186 187 while (p < end) { 188 bytes += utf32_to_utf8_bytes(*p); 189 p++; 190 } 191 192 SharedBuffer* buf = SharedBuffer::alloc(bytes+1); 193 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 194 if (buf) { 195 p = in; 196 char* str = (char*)buf->data(); 197 char* d = str; 198 while (p < end) { 199 const T c = *p++; 200 size_t len = utf32_to_utf8_bytes(c); 201 utf32_to_utf8((uint8_t*)d, c, len); 202 d += len; 203 } 204 *d = 0; 205 206 return str; 207 } 208 209 return getEmptyString(); 210} 211 212static char* allocFromUTF16(const char16_t* in, size_t len) 213{ 214 if (len == 0) return getEmptyString(); 215 216 const size_t bytes = utf8_length_from_utf16(in, len); 217 218 SharedBuffer* buf = SharedBuffer::alloc(bytes+1); 219 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 220 if (buf) { 221 char* str = (char*)buf->data(); 222 223 utf16_to_utf8(in, len, str, bytes+1); 224 225 return str; 226 } 227 228 return getEmptyString(); 229} 230 231static char* allocFromUTF32(const char32_t* in, size_t len) 232{ 233 return allocFromUTF16OrUTF32<char32_t, size_t>(in, len); 234} 235 236// --------------------------------------------------------------------------- 237 238String8::String8() 239 : mString(getEmptyString()) 240{ 241} 242 243String8::String8(const String8& o) 244 : mString(o.mString) 245{ 246 SharedBuffer::bufferFromData(mString)->acquire(); 247} 248 249String8::String8(const char* o) 250 : mString(allocFromUTF8(o, strlen(o))) 251{ 252 if (mString == NULL) { 253 mString = getEmptyString(); 254 } 255} 256 257String8::String8(const char* o, size_t len) 258 : mString(allocFromUTF8(o, len)) 259{ 260 if (mString == NULL) { 261 mString = getEmptyString(); 262 } 263} 264 265String8::String8(const String16& o) 266 : mString(allocFromUTF16(o.string(), o.size())) 267{ 268} 269 270String8::String8(const char16_t* o) 271 : mString(allocFromUTF16(o, strlen16(o))) 272{ 273} 274 275String8::String8(const char16_t* o, size_t len) 276 : mString(allocFromUTF16(o, len)) 277{ 278} 279 280String8::String8(const char32_t* o) 281 : mString(allocFromUTF32(o, strlen32(o))) 282{ 283} 284 285String8::String8(const char32_t* o, size_t len) 286 : mString(allocFromUTF32(o, len)) 287{ 288} 289 290String8::~String8() 291{ 292 SharedBuffer::bufferFromData(mString)->release(); 293} 294 295void String8::setTo(const String8& other) 296{ 297 SharedBuffer::bufferFromData(other.mString)->acquire(); 298 SharedBuffer::bufferFromData(mString)->release(); 299 mString = other.mString; 300} 301 302status_t String8::setTo(const char* other) 303{ 304 const char *newString = allocFromUTF8(other, strlen(other)); 305 SharedBuffer::bufferFromData(mString)->release(); 306 mString = newString; 307 if (mString) return NO_ERROR; 308 309 mString = getEmptyString(); 310 return NO_MEMORY; 311} 312 313status_t String8::setTo(const char* other, size_t len) 314{ 315 const char *newString = allocFromUTF8(other, len); 316 SharedBuffer::bufferFromData(mString)->release(); 317 mString = newString; 318 if (mString) return NO_ERROR; 319 320 mString = getEmptyString(); 321 return NO_MEMORY; 322} 323 324status_t String8::setTo(const char16_t* other, size_t len) 325{ 326 const char *newString = allocFromUTF16(other, len); 327 SharedBuffer::bufferFromData(mString)->release(); 328 mString = newString; 329 if (mString) return NO_ERROR; 330 331 mString = getEmptyString(); 332 return NO_MEMORY; 333} 334 335status_t String8::setTo(const char32_t* other, size_t len) 336{ 337 const char *newString = allocFromUTF32(other, len); 338 SharedBuffer::bufferFromData(mString)->release(); 339 mString = newString; 340 if (mString) return NO_ERROR; 341 342 mString = getEmptyString(); 343 return NO_MEMORY; 344} 345 346status_t String8::append(const String8& other) 347{ 348 const size_t otherLen = other.bytes(); 349 if (bytes() == 0) { 350 setTo(other); 351 return NO_ERROR; 352 } else if (otherLen == 0) { 353 return NO_ERROR; 354 } 355 356 return real_append(other.string(), otherLen); 357} 358 359status_t String8::append(const char* other) 360{ 361 return append(other, strlen(other)); 362} 363 364status_t String8::append(const char* other, size_t otherLen) 365{ 366 if (bytes() == 0) { 367 return setTo(other, otherLen); 368 } else if (otherLen == 0) { 369 return NO_ERROR; 370 } 371 372 return real_append(other, otherLen); 373} 374 375status_t String8::appendFormat(const char* fmt, ...) 376{ 377 va_list ap; 378 va_start(ap, fmt); 379 380 int result = NO_ERROR; 381 int n = vsnprintf(NULL, 0, fmt, ap); 382 if (n != 0) { 383 size_t oldLength = length(); 384 char* buf = lockBuffer(oldLength + n); 385 if (buf) { 386 vsnprintf(buf + oldLength, n + 1, fmt, ap); 387 } else { 388 result = NO_MEMORY; 389 } 390 } 391 392 va_end(ap); 393 return result; 394} 395 396status_t String8::real_append(const char* other, size_t otherLen) 397{ 398 const size_t myLen = bytes(); 399 400 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 401 ->editResize(myLen+otherLen+1); 402 if (buf) { 403 char* str = (char*)buf->data(); 404 mString = str; 405 str += myLen; 406 memcpy(str, other, otherLen); 407 str[otherLen] = '\0'; 408 return NO_ERROR; 409 } 410 return NO_MEMORY; 411} 412 413char* String8::lockBuffer(size_t size) 414{ 415 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 416 ->editResize(size+1); 417 if (buf) { 418 char* str = (char*)buf->data(); 419 mString = str; 420 return str; 421 } 422 return NULL; 423} 424 425void String8::unlockBuffer() 426{ 427 unlockBuffer(strlen(mString)); 428} 429 430status_t String8::unlockBuffer(size_t size) 431{ 432 if (size != this->size()) { 433 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 434 ->editResize(size+1); 435 if (! buf) { 436 return NO_MEMORY; 437 } 438 439 char* str = (char*)buf->data(); 440 str[size] = 0; 441 mString = str; 442 } 443 444 return NO_ERROR; 445} 446 447ssize_t String8::find(const char* other, size_t start) const 448{ 449 size_t len = size(); 450 if (start >= len) { 451 return -1; 452 } 453 const char* s = mString+start; 454 const char* p = strstr(s, other); 455 return p ? p-mString : -1; 456} 457 458void String8::toLower() 459{ 460 toLower(0, size()); 461} 462 463void String8::toLower(size_t start, size_t length) 464{ 465 const size_t len = size(); 466 if (start >= len) { 467 return; 468 } 469 if (start+length > len) { 470 length = len-start; 471 } 472 char* buf = lockBuffer(len); 473 buf += start; 474 while (length > 0) { 475 *buf = tolower(*buf); 476 buf++; 477 length--; 478 } 479 unlockBuffer(len); 480} 481 482void String8::toUpper() 483{ 484 toUpper(0, size()); 485} 486 487void String8::toUpper(size_t start, size_t length) 488{ 489 const size_t len = size(); 490 if (start >= len) { 491 return; 492 } 493 if (start+length > len) { 494 length = len-start; 495 } 496 char* buf = lockBuffer(len); 497 buf += start; 498 while (length > 0) { 499 *buf = toupper(*buf); 500 buf++; 501 length--; 502 } 503 unlockBuffer(len); 504} 505 506size_t String8::getUtf32Length() const 507{ 508 return utf32_length(mString, length()); 509} 510 511int32_t String8::getUtf32At(size_t index, size_t *next_index) const 512{ 513 return utf32_at(mString, length(), index, next_index); 514} 515 516size_t String8::getUtf32(char32_t* dst, size_t dst_len) const 517{ 518 return utf8_to_utf32(mString, length(), dst, dst_len); 519} 520 521TextOutput& operator<<(TextOutput& to, const String8& val) 522{ 523 to << val.string(); 524 return to; 525} 526 527// --------------------------------------------------------------------------- 528// Path functions 529 530void String8::setPathName(const char* name) 531{ 532 setPathName(name, strlen(name)); 533} 534 535void String8::setPathName(const char* name, size_t len) 536{ 537 char* buf = lockBuffer(len); 538 539 memcpy(buf, name, len); 540 541 // remove trailing path separator, if present 542 if (len > 0 && buf[len-1] == OS_PATH_SEPARATOR) 543 len--; 544 545 buf[len] = '\0'; 546 547 unlockBuffer(len); 548} 549 550String8 String8::getPathLeaf(void) const 551{ 552 const char* cp; 553 const char*const buf = mString; 554 555 cp = strrchr(buf, OS_PATH_SEPARATOR); 556 if (cp == NULL) 557 return String8(*this); 558 else 559 return String8(cp+1); 560} 561 562String8 String8::getPathDir(void) const 563{ 564 const char* cp; 565 const char*const str = mString; 566 567 cp = strrchr(str, OS_PATH_SEPARATOR); 568 if (cp == NULL) 569 return String8(""); 570 else 571 return String8(str, cp - str); 572} 573 574String8 String8::walkPath(String8* outRemains) const 575{ 576 const char* cp; 577 const char*const str = mString; 578 const char* buf = str; 579 580 cp = strchr(buf, OS_PATH_SEPARATOR); 581 if (cp == buf) { 582 // don't include a leading '/'. 583 buf = buf+1; 584 cp = strchr(buf, OS_PATH_SEPARATOR); 585 } 586 587 if (cp == NULL) { 588 String8 res = buf != str ? String8(buf) : *this; 589 if (outRemains) *outRemains = String8(""); 590 return res; 591 } 592 593 String8 res(buf, cp-buf); 594 if (outRemains) *outRemains = String8(cp+1); 595 return res; 596} 597 598/* 599 * Helper function for finding the start of an extension in a pathname. 600 * 601 * Returns a pointer inside mString, or NULL if no extension was found. 602 */ 603char* String8::find_extension(void) const 604{ 605 const char* lastSlash; 606 const char* lastDot; 607 int extLen; 608 const char* const str = mString; 609 610 // only look at the filename 611 lastSlash = strrchr(str, OS_PATH_SEPARATOR); 612 if (lastSlash == NULL) 613 lastSlash = str; 614 else 615 lastSlash++; 616 617 // find the last dot 618 lastDot = strrchr(lastSlash, '.'); 619 if (lastDot == NULL) 620 return NULL; 621 622 // looks good, ship it 623 return const_cast<char*>(lastDot); 624} 625 626String8 String8::getPathExtension(void) const 627{ 628 char* ext; 629 630 ext = find_extension(); 631 if (ext != NULL) 632 return String8(ext); 633 else 634 return String8(""); 635} 636 637String8 String8::getBasePath(void) const 638{ 639 char* ext; 640 const char* const str = mString; 641 642 ext = find_extension(); 643 if (ext == NULL) 644 return String8(*this); 645 else 646 return String8(str, ext - str); 647} 648 649String8& String8::appendPath(const char* name) 650{ 651 // TODO: The test below will fail for Win32 paths. Fix later or ignore. 652 if (name[0] != OS_PATH_SEPARATOR) { 653 if (*name == '\0') { 654 // nothing to do 655 return *this; 656 } 657 658 size_t len = length(); 659 if (len == 0) { 660 // no existing filename, just use the new one 661 setPathName(name); 662 return *this; 663 } 664 665 // make room for oldPath + '/' + newPath 666 int newlen = strlen(name); 667 668 char* buf = lockBuffer(len+1+newlen); 669 670 // insert a '/' if needed 671 if (buf[len-1] != OS_PATH_SEPARATOR) 672 buf[len++] = OS_PATH_SEPARATOR; 673 674 memcpy(buf+len, name, newlen+1); 675 len += newlen; 676 677 unlockBuffer(len); 678 679 return *this; 680 } else { 681 setPathName(name); 682 return *this; 683 } 684} 685 686String8& String8::convertToResPath() 687{ 688#if OS_PATH_SEPARATOR != RES_PATH_SEPARATOR 689 size_t len = length(); 690 if (len > 0) { 691 char * buf = lockBuffer(len); 692 for (char * end = buf + len; buf < end; ++buf) { 693 if (*buf == OS_PATH_SEPARATOR) 694 *buf = RES_PATH_SEPARATOR; 695 } 696 unlockBuffer(len); 697 } 698#endif 699 return *this; 700} 701 702}; // namespace android 703 704// --------------------------------------------------------------------------- 705 706size_t strlen32(const char32_t *s) 707{ 708 const char32_t *ss = s; 709 while ( *ss ) 710 ss++; 711 return ss-s; 712} 713 714size_t strnlen32(const char32_t *s, size_t maxlen) 715{ 716 const char32_t *ss = s; 717 while ((maxlen > 0) && *ss) { 718 ss++; 719 maxlen--; 720 } 721 return ss-s; 722} 723 724size_t utf8_length(const char *src) 725{ 726 const char *cur = src; 727 size_t ret = 0; 728 while (*cur != '\0') { 729 const char first_char = *cur++; 730 if ((first_char & 0x80) == 0) { // ASCII 731 ret += 1; 732 continue; 733 } 734 // (UTF-8's character must not be like 10xxxxxx, 735 // but 110xxxxx, 1110xxxx, ... or 1111110x) 736 if ((first_char & 0x40) == 0) { 737 return 0; 738 } 739 740 int32_t mask, to_ignore_mask; 741 size_t num_to_read = 0; 742 char32_t utf32 = 0; 743 for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80; 744 num_to_read < 5 && (first_char & mask); 745 num_to_read++, to_ignore_mask |= mask, mask >>= 1) { 746 if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx 747 return 0; 748 } 749 // 0x3F == 00111111 750 utf32 = (utf32 << 6) + (*cur++ & 0x3F); 751 } 752 // "first_char" must be (110xxxxx - 11110xxx) 753 if (num_to_read == 5) { 754 return 0; 755 } 756 to_ignore_mask |= mask; 757 utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1)); 758 if (utf32 > android::kUnicodeMaxCodepoint) { 759 return 0; 760 } 761 762 ret += num_to_read; 763 } 764 return ret; 765} 766 767size_t utf32_length(const char *src, size_t src_len) 768{ 769 if (src == NULL || src_len == 0) { 770 return 0; 771 } 772 size_t ret = 0; 773 const char* cur; 774 const char* end; 775 size_t num_to_skip; 776 for (cur = src, end = src + src_len, num_to_skip = 1; 777 cur < end; 778 cur += num_to_skip, ret++) { 779 const char first_char = *cur; 780 num_to_skip = 1; 781 if ((first_char & 0x80) == 0) { // ASCII 782 continue; 783 } 784 int32_t mask; 785 786 for (mask = 0x40; (first_char & mask); num_to_skip++, mask >>= 1) { 787 } 788 } 789 return ret; 790} 791 792size_t utf8_length_from_utf32(const char32_t *src, size_t src_len) 793{ 794 if (src == NULL || src_len == 0) { 795 return 0; 796 } 797 size_t ret = 0; 798 const char32_t *end = src + src_len; 799 while (src < end) { 800 ret += android::utf32_to_utf8_bytes(*src++); 801 } 802 return ret; 803} 804 805size_t utf8_length_from_utf16(const char16_t *src, size_t src_len) 806{ 807 if (src == NULL || src_len == 0) { 808 return 0; 809 } 810 size_t ret = 0; 811 const char16_t* const end = src + src_len; 812 while (src < end) { 813 if ((*src & 0xFC00) == 0xD800 && (src + 1) < end 814 && (*++src & 0xFC00) == 0xDC00) { 815 // surrogate pairs are always 4 bytes. 816 ret += 4; 817 src++; 818 } else { 819 ret += android::utf32_to_utf8_bytes((char32_t) *src++); 820 } 821 } 822 return ret; 823} 824 825static int32_t utf32_at_internal(const char* cur, size_t *num_read) 826{ 827 const char first_char = *cur; 828 if ((first_char & 0x80) == 0) { // ASCII 829 *num_read = 1; 830 return *cur; 831 } 832 cur++; 833 char32_t mask, to_ignore_mask; 834 size_t num_to_read = 0; 835 char32_t utf32 = first_char; 836 for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0xFFFFFF80; 837 (first_char & mask); 838 num_to_read++, to_ignore_mask |= mask, mask >>= 1) { 839 // 0x3F == 00111111 840 utf32 = (utf32 << 6) + (*cur++ & 0x3F); 841 } 842 to_ignore_mask |= mask; 843 utf32 &= ~(to_ignore_mask << (6 * (num_to_read - 1))); 844 845 *num_read = num_to_read; 846 return static_cast<int32_t>(utf32); 847} 848 849int32_t utf32_at(const char *src, size_t src_len, 850 size_t index, size_t *next_index) 851{ 852 if (index >= src_len) { 853 return -1; 854 } 855 size_t dummy_index; 856 if (next_index == NULL) { 857 next_index = &dummy_index; 858 } 859 size_t num_read; 860 int32_t ret = utf32_at_internal(src + index, &num_read); 861 if (ret >= 0) { 862 *next_index = index + num_read; 863 } 864 865 return ret; 866} 867 868size_t utf8_to_utf32(const char* src, size_t src_len, 869 char32_t* dst, size_t dst_len) 870{ 871 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 872 return 0; 873 } 874 875 const char* cur = src; 876 const char* end = src + src_len; 877 char32_t* cur_utf32 = dst; 878 const char32_t* end_utf32 = dst + dst_len; 879 while (cur_utf32 < end_utf32 && cur < end) { 880 size_t num_read; 881 *cur_utf32++ = 882 static_cast<char32_t>(utf32_at_internal(cur, &num_read)); 883 cur += num_read; 884 } 885 if (cur_utf32 < end_utf32) { 886 *cur_utf32 = 0; 887 } 888 return static_cast<size_t>(cur_utf32 - dst); 889} 890 891size_t utf32_to_utf8(const char32_t* src, size_t src_len, 892 char* dst, size_t dst_len) 893{ 894 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 895 return 0; 896 } 897 const char32_t *cur_utf32 = src; 898 const char32_t *end_utf32 = src + src_len; 899 char *cur = dst; 900 const char *end = dst + dst_len; 901 while (cur_utf32 < end_utf32 && cur < end) { 902 size_t len = android::utf32_to_utf8_bytes(*cur_utf32); 903 android::utf32_to_utf8((uint8_t *)cur, *cur_utf32++, len); 904 cur += len; 905 } 906 if (cur < end) { 907 *cur = '\0'; 908 } 909 return cur - dst; 910} 911 912size_t utf16_to_utf8(const char16_t* src, size_t src_len, 913 char* dst, size_t dst_len) 914{ 915 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 916 return 0; 917 } 918 const char16_t* cur_utf16 = src; 919 const char16_t* const end_utf16 = src + src_len; 920 char *cur = dst; 921 const char* const end = dst + dst_len; 922 while (cur_utf16 < end_utf16 && cur < end) { 923 char32_t utf32; 924 // surrogate pairs 925 if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) { 926 utf32 = (*cur_utf16++ - 0xD800) << 10; 927 utf32 |= *cur_utf16++ - 0xDC00; 928 utf32 += 0x10000; 929 } else { 930 utf32 = (char32_t) *cur_utf16++; 931 } 932 size_t len = android::utf32_to_utf8_bytes(utf32); 933 android::utf32_to_utf8((uint8_t*)cur, utf32, len); 934 cur += len; 935 } 936 if (cur < end) { 937 *cur = '\0'; 938 } 939 return cur - dst; 940} 941