1/* 2****************************************************************************** 3* Copyright (C) 1999-2014, International Business Machines Corporation and 4* others. All Rights Reserved. 5****************************************************************************** 6* 7* File unistr.cpp 8* 9* Modification History: 10* 11* Date Name Description 12* 09/25/98 stephen Creation. 13* 04/20/99 stephen Overhauled per 4/16 code review. 14* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX 15* 11/18/99 aliu Added handleReplaceBetween() to make inherit from 16* Replaceable. 17* 06/25/01 grhoten Removed the dependency on iostream 18****************************************************************************** 19*/ 20 21#include "unicode/utypes.h" 22#include "unicode/appendable.h" 23#include "unicode/putil.h" 24#include "cstring.h" 25#include "cmemory.h" 26#include "unicode/ustring.h" 27#include "unicode/unistr.h" 28#include "unicode/utf.h" 29#include "unicode/utf16.h" 30#include "uelement.h" 31#include "ustr_imp.h" 32#include "umutex.h" 33#include "uassert.h" 34 35#if 0 36 37#include <iostream> 38using namespace std; 39 40//DEBUGGING 41void 42print(const UnicodeString& s, 43 const char *name) 44{ 45 UChar c; 46 cout << name << ":|"; 47 for(int i = 0; i < s.length(); ++i) { 48 c = s[i]; 49 if(c>= 0x007E || c < 0x0020) 50 cout << "[0x" << hex << s[i] << "]"; 51 else 52 cout << (char) s[i]; 53 } 54 cout << '|' << endl; 55} 56 57void 58print(const UChar *s, 59 int32_t len, 60 const char *name) 61{ 62 UChar c; 63 cout << name << ":|"; 64 for(int i = 0; i < len; ++i) { 65 c = s[i]; 66 if(c>= 0x007E || c < 0x0020) 67 cout << "[0x" << hex << s[i] << "]"; 68 else 69 cout << (char) s[i]; 70 } 71 cout << '|' << endl; 72} 73// END DEBUGGING 74#endif 75 76// Local function definitions for now 77 78// need to copy areas that may overlap 79static 80inline void 81us_arrayCopy(const UChar *src, int32_t srcStart, 82 UChar *dst, int32_t dstStart, int32_t count) 83{ 84 if(count>0) { 85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src))); 86 } 87} 88 89// u_unescapeAt() callback to get a UChar from a UnicodeString 90U_CDECL_BEGIN 91static UChar U_CALLCONV 92UnicodeString_charAt(int32_t offset, void *context) { 93 return ((icu::UnicodeString*) context)->charAt(offset); 94} 95U_CDECL_END 96 97U_NAMESPACE_BEGIN 98 99/* The Replaceable virtual destructor can't be defined in the header 100 due to how AIX works with multiple definitions of virtual functions. 101*/ 102Replaceable::~Replaceable() {} 103 104UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString) 105 106UnicodeString U_EXPORT2 107operator+ (const UnicodeString &s1, const UnicodeString &s2) { 108 return 109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0). 110 append(s1). 111 append(s2); 112} 113 114//======================================== 115// Reference Counting functions, put at top of file so that optimizing compilers 116// have a chance to automatically inline. 117//======================================== 118 119void 120UnicodeString::addRef() { 121 umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1); 122} 123 124int32_t 125UnicodeString::removeRef() { 126 return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1); 127} 128 129int32_t 130UnicodeString::refCount() const { 131 return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1)); 132} 133 134void 135UnicodeString::releaseArray() { 136 if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) { 137 uprv_free((int32_t *)fUnion.fFields.fArray - 1); 138 } 139} 140 141 142 143//======================================== 144// Constructors 145//======================================== 146 147// The default constructor is inline in unistr.h. 148 149UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) { 150 fUnion.fFields.fLengthAndFlags = 0; 151 if(count <= 0 || (uint32_t)c > 0x10ffff) { 152 // just allocate and do not do anything else 153 allocate(capacity); 154 } else { 155 // count > 0, allocate and fill the new string with count c's 156 int32_t unitCount = U16_LENGTH(c), length = count * unitCount; 157 if(capacity < length) { 158 capacity = length; 159 } 160 if(allocate(capacity)) { 161 UChar *array = getArrayStart(); 162 int32_t i = 0; 163 164 // fill the new string with c 165 if(unitCount == 1) { 166 // fill with length UChars 167 while(i < length) { 168 array[i++] = (UChar)c; 169 } 170 } else { 171 // get the code units for c 172 UChar units[U16_MAX_LENGTH]; 173 U16_APPEND_UNSAFE(units, i, c); 174 175 // now it must be i==unitCount 176 i = 0; 177 178 // for Unicode, unitCount can only be 1, 2, 3, or 4 179 // 1 is handled above 180 while(i < length) { 181 int32_t unitIdx = 0; 182 while(unitIdx < unitCount) { 183 array[i++]=units[unitIdx++]; 184 } 185 } 186 } 187 } 188 setLength(length); 189 } 190} 191 192UnicodeString::UnicodeString(UChar ch) { 193 fUnion.fFields.fLengthAndFlags = kLength1 | kShortString; 194 fUnion.fStackFields.fBuffer[0] = ch; 195} 196 197UnicodeString::UnicodeString(UChar32 ch) { 198 fUnion.fFields.fLengthAndFlags = kShortString; 199 int32_t i = 0; 200 UBool isError = FALSE; 201 U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError); 202 // We test isError so that the compiler does not complain that we don't. 203 // If isError then i==0 which is what we want anyway. 204 if(!isError) { 205 setShortLength(i); 206 } 207} 208 209UnicodeString::UnicodeString(const UChar *text) { 210 fUnion.fFields.fLengthAndFlags = kShortString; 211 doReplace(0, 0, text, 0, -1); 212} 213 214UnicodeString::UnicodeString(const UChar *text, 215 int32_t textLength) { 216 fUnion.fFields.fLengthAndFlags = kShortString; 217 doReplace(0, 0, text, 0, textLength); 218} 219 220UnicodeString::UnicodeString(UBool isTerminated, 221 const UChar *text, 222 int32_t textLength) { 223 fUnion.fFields.fLengthAndFlags = kReadonlyAlias; 224 if(text == NULL) { 225 // treat as an empty string, do not alias 226 setToEmpty(); 227 } else if(textLength < -1 || 228 (textLength == -1 && !isTerminated) || 229 (textLength >= 0 && isTerminated && text[textLength] != 0) 230 ) { 231 setToBogus(); 232 } else { 233 if(textLength == -1) { 234 // text is terminated, or else it would have failed the above test 235 textLength = u_strlen(text); 236 } 237 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 238 } 239} 240 241UnicodeString::UnicodeString(UChar *buff, 242 int32_t buffLength, 243 int32_t buffCapacity) { 244 fUnion.fFields.fLengthAndFlags = kWritableAlias; 245 if(buff == NULL) { 246 // treat as an empty string, do not alias 247 setToEmpty(); 248 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 249 setToBogus(); 250 } else { 251 if(buffLength == -1) { 252 // fLength = u_strlen(buff); but do not look beyond buffCapacity 253 const UChar *p = buff, *limit = buff + buffCapacity; 254 while(p != limit && *p != 0) { 255 ++p; 256 } 257 buffLength = (int32_t)(p - buff); 258 } 259 setArray(buff, buffLength, buffCapacity); 260 } 261} 262 263UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) { 264 fUnion.fFields.fLengthAndFlags = kShortString; 265 if(src==NULL) { 266 // treat as an empty string 267 } else { 268 if(length<0) { 269 length=(int32_t)uprv_strlen(src); 270 } 271 if(cloneArrayIfNeeded(length, length, FALSE)) { 272 u_charsToUChars(src, getArrayStart(), length); 273 setLength(length); 274 } else { 275 setToBogus(); 276 } 277 } 278} 279 280#if U_CHARSET_IS_UTF8 281 282UnicodeString::UnicodeString(const char *codepageData) { 283 fUnion.fFields.fLengthAndFlags = kShortString; 284 if(codepageData != 0) { 285 setToUTF8(codepageData); 286 } 287} 288 289UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) { 290 fUnion.fFields.fLengthAndFlags = kShortString; 291 // if there's nothing to convert, do nothing 292 if(codepageData == 0 || dataLength == 0 || dataLength < -1) { 293 return; 294 } 295 if(dataLength == -1) { 296 dataLength = (int32_t)uprv_strlen(codepageData); 297 } 298 setToUTF8(StringPiece(codepageData, dataLength)); 299} 300 301// else see unistr_cnv.cpp 302#endif 303 304UnicodeString::UnicodeString(const UnicodeString& that) { 305 fUnion.fFields.fLengthAndFlags = kShortString; 306 copyFrom(that); 307} 308 309UnicodeString::UnicodeString(const UnicodeString& that, 310 int32_t srcStart) { 311 fUnion.fFields.fLengthAndFlags = kShortString; 312 setTo(that, srcStart); 313} 314 315UnicodeString::UnicodeString(const UnicodeString& that, 316 int32_t srcStart, 317 int32_t srcLength) { 318 fUnion.fFields.fLengthAndFlags = kShortString; 319 setTo(that, srcStart, srcLength); 320} 321 322// Replaceable base class clone() default implementation, does not clone 323Replaceable * 324Replaceable::clone() const { 325 return NULL; 326} 327 328// UnicodeString overrides clone() with a real implementation 329Replaceable * 330UnicodeString::clone() const { 331 return new UnicodeString(*this); 332} 333 334//======================================== 335// array allocation 336//======================================== 337 338UBool 339UnicodeString::allocate(int32_t capacity) { 340 if(capacity <= US_STACKBUF_SIZE) { 341 fUnion.fFields.fLengthAndFlags = kShortString; 342 } else { 343 // count bytes for the refCounter and the string capacity, and 344 // round up to a multiple of 16; then divide by 4 and allocate int32_t's 345 // to be safely aligned for the refCount 346 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer() 347 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2); 348 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words ); 349 if(array != 0) { 350 // set initial refCount and point behind the refCount 351 *array++ = 1; 352 353 // have fArray point to the first UChar 354 fUnion.fFields.fArray = (UChar *)array; 355 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); 356 fUnion.fFields.fLengthAndFlags = kLongString; 357 } else { 358 fUnion.fFields.fLengthAndFlags = kIsBogus; 359 fUnion.fFields.fArray = 0; 360 fUnion.fFields.fCapacity = 0; 361 return FALSE; 362 } 363 } 364 return TRUE; 365} 366 367//======================================== 368// Destructor 369//======================================== 370UnicodeString::~UnicodeString() 371{ 372 releaseArray(); 373} 374 375//======================================== 376// Factory methods 377//======================================== 378 379UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) { 380 UnicodeString result; 381 result.setToUTF8(utf8); 382 return result; 383} 384 385UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) { 386 UnicodeString result; 387 int32_t capacity; 388 // Most UTF-32 strings will be BMP-only and result in a same-length 389 // UTF-16 string. We overestimate the capacity just slightly, 390 // just in case there are a few supplementary characters. 391 if(length <= US_STACKBUF_SIZE) { 392 capacity = US_STACKBUF_SIZE; 393 } else { 394 capacity = length + (length >> 4) + 4; 395 } 396 do { 397 UChar *utf16 = result.getBuffer(capacity); 398 int32_t length16; 399 UErrorCode errorCode = U_ZERO_ERROR; 400 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16, 401 utf32, length, 402 0xfffd, // Substitution character. 403 NULL, // Don't care about number of substitutions. 404 &errorCode); 405 result.releaseBuffer(length16); 406 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 407 capacity = length16 + 1; // +1 for the terminating NUL. 408 continue; 409 } else if(U_FAILURE(errorCode)) { 410 result.setToBogus(); 411 } 412 break; 413 } while(TRUE); 414 return result; 415} 416 417//======================================== 418// Assignment 419//======================================== 420 421UnicodeString & 422UnicodeString::operator=(const UnicodeString &src) { 423 return copyFrom(src); 424} 425 426UnicodeString & 427UnicodeString::fastCopyFrom(const UnicodeString &src) { 428 return copyFrom(src, TRUE); 429} 430 431UnicodeString & 432UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { 433 // if assigning to ourselves, do nothing 434 if(this == &src) { 435 return *this; 436 } 437 438 // is the right side bogus? 439 if(src.isBogus()) { 440 setToBogus(); 441 return *this; 442 } 443 444 // delete the current contents 445 releaseArray(); 446 447 if(src.isEmpty()) { 448 // empty string - use the stack buffer 449 setToEmpty(); 450 return *this; 451 } 452 453 // fLength>0 and not an "open" src.getBuffer(minCapacity) 454 fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags; 455 switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) { 456 case kShortString: 457 // short string using the stack buffer, do the same 458 uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer, 459 getShortLength() * U_SIZEOF_UCHAR); 460 break; 461 case kLongString: 462 // src uses a refCounted string buffer, use that buffer with refCount 463 // src is const, use a cast - we don't actually change it 464 ((UnicodeString &)src).addRef(); 465 // copy all fields, share the reference-counted buffer 466 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 467 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 468 if(!hasShortLength()) { 469 fUnion.fFields.fLength = src.fUnion.fFields.fLength; 470 } 471 break; 472 case kReadonlyAlias: 473 if(fastCopy) { 474 // src is a readonly alias, do the same 475 // -> maintain the readonly alias as such 476 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 477 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 478 if(!hasShortLength()) { 479 fUnion.fFields.fLength = src.fUnion.fFields.fLength; 480 } 481 break; 482 } 483 // else if(!fastCopy) fall through to case kWritableAlias 484 // -> allocate a new buffer and copy the contents 485 case kWritableAlias: { 486 // src is a writable alias; we make a copy of that instead 487 int32_t srcLength = src.length(); 488 if(allocate(srcLength)) { 489 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR); 490 setLength(srcLength); 491 break; 492 } 493 // if there is not enough memory, then fall through to setting to bogus 494 } 495 default: 496 // if src is bogus, set ourselves to bogus 497 // do not call setToBogus() here because fArray and flags are not consistent here 498 fUnion.fFields.fLengthAndFlags = kIsBogus; 499 fUnion.fFields.fArray = 0; 500 fUnion.fFields.fCapacity = 0; 501 break; 502 } 503 504 return *this; 505} 506 507//======================================== 508// Miscellaneous operations 509//======================================== 510 511UnicodeString UnicodeString::unescape() const { 512 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity 513 const UChar *array = getBuffer(); 514 int32_t len = length(); 515 int32_t prev = 0; 516 for (int32_t i=0;;) { 517 if (i == len) { 518 result.append(array, prev, len - prev); 519 break; 520 } 521 if (array[i++] == 0x5C /*'\\'*/) { 522 result.append(array, prev, (i - 1) - prev); 523 UChar32 c = unescapeAt(i); // advances i 524 if (c < 0) { 525 result.remove(); // return empty string 526 break; // invalid escape sequence 527 } 528 result.append(c); 529 prev = i; 530 } 531 } 532 return result; 533} 534 535UChar32 UnicodeString::unescapeAt(int32_t &offset) const { 536 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this); 537} 538 539//======================================== 540// Read-only implementation 541//======================================== 542UBool 543UnicodeString::doEquals(const UnicodeString &text, int32_t len) const { 544 // Requires: this & text not bogus and have same lengths. 545 // Byte-wise comparison works for equality regardless of endianness. 546 return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0; 547} 548 549int8_t 550UnicodeString::doCompare( int32_t start, 551 int32_t length, 552 const UChar *srcChars, 553 int32_t srcStart, 554 int32_t srcLength) const 555{ 556 // compare illegal string values 557 if(isBogus()) { 558 return -1; 559 } 560 561 // pin indices to legal values 562 pinIndices(start, length); 563 564 if(srcChars == NULL) { 565 // treat const UChar *srcChars==NULL as an empty string 566 return length == 0 ? 0 : 1; 567 } 568 569 // get the correct pointer 570 const UChar *chars = getArrayStart(); 571 572 chars += start; 573 srcChars += srcStart; 574 575 int32_t minLength; 576 int8_t lengthResult; 577 578 // get the srcLength if necessary 579 if(srcLength < 0) { 580 srcLength = u_strlen(srcChars + srcStart); 581 } 582 583 // are we comparing different lengths? 584 if(length != srcLength) { 585 if(length < srcLength) { 586 minLength = length; 587 lengthResult = -1; 588 } else { 589 minLength = srcLength; 590 lengthResult = 1; 591 } 592 } else { 593 minLength = length; 594 lengthResult = 0; 595 } 596 597 /* 598 * note that uprv_memcmp() returns an int but we return an int8_t; 599 * we need to take care not to truncate the result - 600 * one way to do this is to right-shift the value to 601 * move the sign bit into the lower 8 bits and making sure that this 602 * does not become 0 itself 603 */ 604 605 if(minLength > 0 && chars != srcChars) { 606 int32_t result; 607 608# if U_IS_BIG_ENDIAN 609 // big-endian: byte comparison works 610 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar)); 611 if(result != 0) { 612 return (int8_t)(result >> 15 | 1); 613 } 614# else 615 // little-endian: compare UChar units 616 do { 617 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++)); 618 if(result != 0) { 619 return (int8_t)(result >> 15 | 1); 620 } 621 } while(--minLength > 0); 622# endif 623 } 624 return lengthResult; 625} 626 627/* String compare in code point order - doCompare() compares in code unit order. */ 628int8_t 629UnicodeString::doCompareCodePointOrder(int32_t start, 630 int32_t length, 631 const UChar *srcChars, 632 int32_t srcStart, 633 int32_t srcLength) const 634{ 635 // compare illegal string values 636 // treat const UChar *srcChars==NULL as an empty string 637 if(isBogus()) { 638 return -1; 639 } 640 641 // pin indices to legal values 642 pinIndices(start, length); 643 644 if(srcChars == NULL) { 645 srcStart = srcLength = 0; 646 } 647 648 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE); 649 /* translate the 32-bit result into an 8-bit one */ 650 if(diff!=0) { 651 return (int8_t)(diff >> 15 | 1); 652 } else { 653 return 0; 654 } 655} 656 657int32_t 658UnicodeString::getLength() const { 659 return length(); 660} 661 662UChar 663UnicodeString::getCharAt(int32_t offset) const { 664 return charAt(offset); 665} 666 667UChar32 668UnicodeString::getChar32At(int32_t offset) const { 669 return char32At(offset); 670} 671 672UChar32 673UnicodeString::char32At(int32_t offset) const 674{ 675 int32_t len = length(); 676 if((uint32_t)offset < (uint32_t)len) { 677 const UChar *array = getArrayStart(); 678 UChar32 c; 679 U16_GET(array, 0, offset, len, c); 680 return c; 681 } else { 682 return kInvalidUChar; 683 } 684} 685 686int32_t 687UnicodeString::getChar32Start(int32_t offset) const { 688 if((uint32_t)offset < (uint32_t)length()) { 689 const UChar *array = getArrayStart(); 690 U16_SET_CP_START(array, 0, offset); 691 return offset; 692 } else { 693 return 0; 694 } 695} 696 697int32_t 698UnicodeString::getChar32Limit(int32_t offset) const { 699 int32_t len = length(); 700 if((uint32_t)offset < (uint32_t)len) { 701 const UChar *array = getArrayStart(); 702 U16_SET_CP_LIMIT(array, 0, offset, len); 703 return offset; 704 } else { 705 return len; 706 } 707} 708 709int32_t 710UnicodeString::countChar32(int32_t start, int32_t length) const { 711 pinIndices(start, length); 712 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL 713 return u_countChar32(getArrayStart()+start, length); 714} 715 716UBool 717UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const { 718 pinIndices(start, length); 719 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL 720 return u_strHasMoreChar32Than(getArrayStart()+start, length, number); 721} 722 723int32_t 724UnicodeString::moveIndex32(int32_t index, int32_t delta) const { 725 // pin index 726 int32_t len = length(); 727 if(index<0) { 728 index=0; 729 } else if(index>len) { 730 index=len; 731 } 732 733 const UChar *array = getArrayStart(); 734 if(delta>0) { 735 U16_FWD_N(array, index, len, delta); 736 } else { 737 U16_BACK_N(array, 0, index, -delta); 738 } 739 740 return index; 741} 742 743void 744UnicodeString::doExtract(int32_t start, 745 int32_t length, 746 UChar *dst, 747 int32_t dstStart) const 748{ 749 // pin indices to legal values 750 pinIndices(start, length); 751 752 // do not copy anything if we alias dst itself 753 const UChar *array = getArrayStart(); 754 if(array + start != dst + dstStart) { 755 us_arrayCopy(array, start, dst, dstStart, length); 756 } 757} 758 759int32_t 760UnicodeString::extract(UChar *dest, int32_t destCapacity, 761 UErrorCode &errorCode) const { 762 int32_t len = length(); 763 if(U_SUCCESS(errorCode)) { 764 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { 765 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 766 } else { 767 const UChar *array = getArrayStart(); 768 if(len>0 && len<=destCapacity && array!=dest) { 769 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR); 770 } 771 return u_terminateUChars(dest, destCapacity, len, &errorCode); 772 } 773 } 774 775 return len; 776} 777 778int32_t 779UnicodeString::extract(int32_t start, 780 int32_t length, 781 char *target, 782 int32_t targetCapacity, 783 enum EInvariant) const 784{ 785 // if the arguments are illegal, then do nothing 786 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) { 787 return 0; 788 } 789 790 // pin the indices to legal values 791 pinIndices(start, length); 792 793 if(length <= targetCapacity) { 794 u_UCharsToChars(getArrayStart() + start, target, length); 795 } 796 UErrorCode status = U_ZERO_ERROR; 797 return u_terminateChars(target, targetCapacity, length, &status); 798} 799 800UnicodeString 801UnicodeString::tempSubString(int32_t start, int32_t len) const { 802 pinIndices(start, len); 803 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer 804 if(array==NULL) { 805 array=fUnion.fStackFields.fBuffer; // anything not NULL because that would make an empty string 806 len=-2; // bogus result string 807 } 808 return UnicodeString(FALSE, array + start, len); 809} 810 811int32_t 812UnicodeString::toUTF8(int32_t start, int32_t len, 813 char *target, int32_t capacity) const { 814 pinIndices(start, len); 815 int32_t length8; 816 UErrorCode errorCode = U_ZERO_ERROR; 817 u_strToUTF8WithSub(target, capacity, &length8, 818 getBuffer() + start, len, 819 0xFFFD, // Standard substitution character. 820 NULL, // Don't care about number of substitutions. 821 &errorCode); 822 return length8; 823} 824 825#if U_CHARSET_IS_UTF8 826 827int32_t 828UnicodeString::extract(int32_t start, int32_t len, 829 char *target, uint32_t dstSize) const { 830 // if the arguments are illegal, then do nothing 831 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { 832 return 0; 833 } 834 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff); 835} 836 837// else see unistr_cnv.cpp 838#endif 839 840void 841UnicodeString::extractBetween(int32_t start, 842 int32_t limit, 843 UnicodeString& target) const { 844 pinIndex(start); 845 pinIndex(limit); 846 doExtract(start, limit - start, target); 847} 848 849// When converting from UTF-16 to UTF-8, the result will have at most 3 times 850// as many bytes as the source has UChars. 851// The "worst cases" are writing systems like Indic, Thai and CJK with 852// 3:1 bytes:UChars. 853void 854UnicodeString::toUTF8(ByteSink &sink) const { 855 int32_t length16 = length(); 856 if(length16 != 0) { 857 char stackBuffer[1024]; 858 int32_t capacity = (int32_t)sizeof(stackBuffer); 859 UBool utf8IsOwned = FALSE; 860 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity, 861 3*length16, 862 stackBuffer, capacity, 863 &capacity); 864 int32_t length8 = 0; 865 UErrorCode errorCode = U_ZERO_ERROR; 866 u_strToUTF8WithSub(utf8, capacity, &length8, 867 getBuffer(), length16, 868 0xFFFD, // Standard substitution character. 869 NULL, // Don't care about number of substitutions. 870 &errorCode); 871 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 872 utf8 = (char *)uprv_malloc(length8); 873 if(utf8 != NULL) { 874 utf8IsOwned = TRUE; 875 errorCode = U_ZERO_ERROR; 876 u_strToUTF8WithSub(utf8, length8, &length8, 877 getBuffer(), length16, 878 0xFFFD, // Standard substitution character. 879 NULL, // Don't care about number of substitutions. 880 &errorCode); 881 } else { 882 errorCode = U_MEMORY_ALLOCATION_ERROR; 883 } 884 } 885 if(U_SUCCESS(errorCode)) { 886 sink.Append(utf8, length8); 887 sink.Flush(); 888 } 889 if(utf8IsOwned) { 890 uprv_free(utf8); 891 } 892 } 893} 894 895int32_t 896UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const { 897 int32_t length32=0; 898 if(U_SUCCESS(errorCode)) { 899 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments. 900 u_strToUTF32WithSub(utf32, capacity, &length32, 901 getBuffer(), length(), 902 0xfffd, // Substitution character. 903 NULL, // Don't care about number of substitutions. 904 &errorCode); 905 } 906 return length32; 907} 908 909int32_t 910UnicodeString::indexOf(const UChar *srcChars, 911 int32_t srcStart, 912 int32_t srcLength, 913 int32_t start, 914 int32_t length) const 915{ 916 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 917 return -1; 918 } 919 920 // UnicodeString does not find empty substrings 921 if(srcLength < 0 && srcChars[srcStart] == 0) { 922 return -1; 923 } 924 925 // get the indices within bounds 926 pinIndices(start, length); 927 928 // find the first occurrence of the substring 929 const UChar *array = getArrayStart(); 930 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength); 931 if(match == NULL) { 932 return -1; 933 } else { 934 return (int32_t)(match - array); 935 } 936} 937 938int32_t 939UnicodeString::doIndexOf(UChar c, 940 int32_t start, 941 int32_t length) const 942{ 943 // pin indices 944 pinIndices(start, length); 945 946 // find the first occurrence of c 947 const UChar *array = getArrayStart(); 948 const UChar *match = u_memchr(array + start, c, length); 949 if(match == NULL) { 950 return -1; 951 } else { 952 return (int32_t)(match - array); 953 } 954} 955 956int32_t 957UnicodeString::doIndexOf(UChar32 c, 958 int32_t start, 959 int32_t length) const { 960 // pin indices 961 pinIndices(start, length); 962 963 // find the first occurrence of c 964 const UChar *array = getArrayStart(); 965 const UChar *match = u_memchr32(array + start, c, length); 966 if(match == NULL) { 967 return -1; 968 } else { 969 return (int32_t)(match - array); 970 } 971} 972 973int32_t 974UnicodeString::lastIndexOf(const UChar *srcChars, 975 int32_t srcStart, 976 int32_t srcLength, 977 int32_t start, 978 int32_t length) const 979{ 980 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 981 return -1; 982 } 983 984 // UnicodeString does not find empty substrings 985 if(srcLength < 0 && srcChars[srcStart] == 0) { 986 return -1; 987 } 988 989 // get the indices within bounds 990 pinIndices(start, length); 991 992 // find the last occurrence of the substring 993 const UChar *array = getArrayStart(); 994 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength); 995 if(match == NULL) { 996 return -1; 997 } else { 998 return (int32_t)(match - array); 999 } 1000} 1001 1002int32_t 1003UnicodeString::doLastIndexOf(UChar c, 1004 int32_t start, 1005 int32_t length) const 1006{ 1007 if(isBogus()) { 1008 return -1; 1009 } 1010 1011 // pin indices 1012 pinIndices(start, length); 1013 1014 // find the last occurrence of c 1015 const UChar *array = getArrayStart(); 1016 const UChar *match = u_memrchr(array + start, c, length); 1017 if(match == NULL) { 1018 return -1; 1019 } else { 1020 return (int32_t)(match - array); 1021 } 1022} 1023 1024int32_t 1025UnicodeString::doLastIndexOf(UChar32 c, 1026 int32_t start, 1027 int32_t length) const { 1028 // pin indices 1029 pinIndices(start, length); 1030 1031 // find the last occurrence of c 1032 const UChar *array = getArrayStart(); 1033 const UChar *match = u_memrchr32(array + start, c, length); 1034 if(match == NULL) { 1035 return -1; 1036 } else { 1037 return (int32_t)(match - array); 1038 } 1039} 1040 1041//======================================== 1042// Write implementation 1043//======================================== 1044 1045UnicodeString& 1046UnicodeString::findAndReplace(int32_t start, 1047 int32_t length, 1048 const UnicodeString& oldText, 1049 int32_t oldStart, 1050 int32_t oldLength, 1051 const UnicodeString& newText, 1052 int32_t newStart, 1053 int32_t newLength) 1054{ 1055 if(isBogus() || oldText.isBogus() || newText.isBogus()) { 1056 return *this; 1057 } 1058 1059 pinIndices(start, length); 1060 oldText.pinIndices(oldStart, oldLength); 1061 newText.pinIndices(newStart, newLength); 1062 1063 if(oldLength == 0) { 1064 return *this; 1065 } 1066 1067 while(length > 0 && length >= oldLength) { 1068 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length); 1069 if(pos < 0) { 1070 // no more oldText's here: done 1071 break; 1072 } else { 1073 // we found oldText, replace it by newText and go beyond it 1074 replace(pos, oldLength, newText, newStart, newLength); 1075 length -= pos + oldLength - start; 1076 start = pos + newLength; 1077 } 1078 } 1079 1080 return *this; 1081} 1082 1083 1084void 1085UnicodeString::setToBogus() 1086{ 1087 releaseArray(); 1088 1089 fUnion.fFields.fLengthAndFlags = kIsBogus; 1090 fUnion.fFields.fArray = 0; 1091 fUnion.fFields.fCapacity = 0; 1092} 1093 1094// turn a bogus string into an empty one 1095void 1096UnicodeString::unBogus() { 1097 if(fUnion.fFields.fLengthAndFlags & kIsBogus) { 1098 setToEmpty(); 1099 } 1100} 1101 1102const UChar * 1103UnicodeString::getTerminatedBuffer() { 1104 if(!isWritable()) { 1105 return 0; 1106 } 1107 UChar *array = getArrayStart(); 1108 int32_t len = length(); 1109 if(len < getCapacity()) { 1110 if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) { 1111 // If len<capacity on a read-only alias, then array[len] is 1112 // either the original NUL (if constructed with (TRUE, s, length)) 1113 // or one of the original string contents characters (if later truncated), 1114 // therefore we can assume that array[len] is initialized memory. 1115 if(array[len] == 0) { 1116 return array; 1117 } 1118 } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) { 1119 // kRefCounted: Do not write the NUL if the buffer is shared. 1120 // That is mostly safe, except when the length of one copy was modified 1121 // without copy-on-write, e.g., via truncate(newLength) or remove(void). 1122 // Then the NUL would be written into the middle of another copy's string. 1123 1124 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL. 1125 // Do not test if there is a NUL already because it might be uninitialized memory. 1126 // (That would be safe, but tools like valgrind & Purify would complain.) 1127 array[len] = 0; 1128 return array; 1129 } 1130 } 1131 if(cloneArrayIfNeeded(len+1)) { 1132 array = getArrayStart(); 1133 array[len] = 0; 1134 return array; 1135 } else { 1136 return NULL; 1137 } 1138} 1139 1140// setTo() analogous to the readonly-aliasing constructor with the same signature 1141UnicodeString & 1142UnicodeString::setTo(UBool isTerminated, 1143 const UChar *text, 1144 int32_t textLength) 1145{ 1146 if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) { 1147 // do not modify a string that has an "open" getBuffer(minCapacity) 1148 return *this; 1149 } 1150 1151 if(text == NULL) { 1152 // treat as an empty string, do not alias 1153 releaseArray(); 1154 setToEmpty(); 1155 return *this; 1156 } 1157 1158 if( textLength < -1 || 1159 (textLength == -1 && !isTerminated) || 1160 (textLength >= 0 && isTerminated && text[textLength] != 0) 1161 ) { 1162 setToBogus(); 1163 return *this; 1164 } 1165 1166 releaseArray(); 1167 1168 if(textLength == -1) { 1169 // text is terminated, or else it would have failed the above test 1170 textLength = u_strlen(text); 1171 } 1172 fUnion.fFields.fLengthAndFlags = kReadonlyAlias; 1173 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 1174 return *this; 1175} 1176 1177// setTo() analogous to the writable-aliasing constructor with the same signature 1178UnicodeString & 1179UnicodeString::setTo(UChar *buffer, 1180 int32_t buffLength, 1181 int32_t buffCapacity) { 1182 if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) { 1183 // do not modify a string that has an "open" getBuffer(minCapacity) 1184 return *this; 1185 } 1186 1187 if(buffer == NULL) { 1188 // treat as an empty string, do not alias 1189 releaseArray(); 1190 setToEmpty(); 1191 return *this; 1192 } 1193 1194 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 1195 setToBogus(); 1196 return *this; 1197 } else if(buffLength == -1) { 1198 // buffLength = u_strlen(buff); but do not look beyond buffCapacity 1199 const UChar *p = buffer, *limit = buffer + buffCapacity; 1200 while(p != limit && *p != 0) { 1201 ++p; 1202 } 1203 buffLength = (int32_t)(p - buffer); 1204 } 1205 1206 releaseArray(); 1207 1208 fUnion.fFields.fLengthAndFlags = kWritableAlias; 1209 setArray(buffer, buffLength, buffCapacity); 1210 return *this; 1211} 1212 1213UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) { 1214 unBogus(); 1215 int32_t length = utf8.length(); 1216 int32_t capacity; 1217 // The UTF-16 string will be at most as long as the UTF-8 string. 1218 if(length <= US_STACKBUF_SIZE) { 1219 capacity = US_STACKBUF_SIZE; 1220 } else { 1221 capacity = length + 1; // +1 for the terminating NUL. 1222 } 1223 UChar *utf16 = getBuffer(capacity); 1224 int32_t length16; 1225 UErrorCode errorCode = U_ZERO_ERROR; 1226 u_strFromUTF8WithSub(utf16, getCapacity(), &length16, 1227 utf8.data(), length, 1228 0xfffd, // Substitution character. 1229 NULL, // Don't care about number of substitutions. 1230 &errorCode); 1231 releaseBuffer(length16); 1232 if(U_FAILURE(errorCode)) { 1233 setToBogus(); 1234 } 1235 return *this; 1236} 1237 1238UnicodeString& 1239UnicodeString::setCharAt(int32_t offset, 1240 UChar c) 1241{ 1242 int32_t len = length(); 1243 if(cloneArrayIfNeeded() && len > 0) { 1244 if(offset < 0) { 1245 offset = 0; 1246 } else if(offset >= len) { 1247 offset = len - 1; 1248 } 1249 1250 getArrayStart()[offset] = c; 1251 } 1252 return *this; 1253} 1254 1255UnicodeString& 1256UnicodeString::replace(int32_t start, 1257 int32_t _length, 1258 UChar32 srcChar) { 1259 UChar buffer[U16_MAX_LENGTH]; 1260 int32_t count = 0; 1261 UBool isError = FALSE; 1262 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError); 1263 // We test isError so that the compiler does not complain that we don't. 1264 // If isError (srcChar is not a valid code point) then count==0 which means 1265 // we remove the source segment rather than replacing it with srcChar. 1266 return doReplace(start, _length, buffer, 0, isError ? 0 : count); 1267} 1268 1269UnicodeString& 1270UnicodeString::append(UChar32 srcChar) { 1271 UChar buffer[U16_MAX_LENGTH]; 1272 int32_t _length = 0; 1273 UBool isError = FALSE; 1274 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError); 1275 // We test isError so that the compiler does not complain that we don't. 1276 // If isError then _length==0 which turns the doReplace() into a no-op anyway. 1277 return isError ? *this : doReplace(length(), 0, buffer, 0, _length); 1278} 1279 1280UnicodeString& 1281UnicodeString::doReplace( int32_t start, 1282 int32_t length, 1283 const UnicodeString& src, 1284 int32_t srcStart, 1285 int32_t srcLength) 1286{ 1287 if(!src.isBogus()) { 1288 // pin the indices to legal values 1289 src.pinIndices(srcStart, srcLength); 1290 1291 // get the characters from src 1292 // and replace the range in ourselves with them 1293 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength); 1294 } else { 1295 // remove the range 1296 return doReplace(start, length, 0, 0, 0); 1297 } 1298} 1299 1300UnicodeString& 1301UnicodeString::doReplace(int32_t start, 1302 int32_t length, 1303 const UChar *srcChars, 1304 int32_t srcStart, 1305 int32_t srcLength) 1306{ 1307 if(!isWritable()) { 1308 return *this; 1309 } 1310 1311 int32_t oldLength = this->length(); 1312 1313 // optimize (read-only alias).remove(0, start) and .remove(start, end) 1314 if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) { 1315 if(start == 0) { 1316 // remove prefix by adjusting the array pointer 1317 pinIndex(length); 1318 fUnion.fFields.fArray += length; 1319 fUnion.fFields.fCapacity -= length; 1320 setLength(oldLength - length); 1321 return *this; 1322 } else { 1323 pinIndex(start); 1324 if(length >= (oldLength - start)) { 1325 // remove suffix by reducing the length (like truncate()) 1326 setLength(start); 1327 fUnion.fFields.fCapacity = start; // not NUL-terminated any more 1328 return *this; 1329 } 1330 } 1331 } 1332 1333 if(srcChars == 0) { 1334 srcStart = srcLength = 0; 1335 } else if(srcLength < 0) { 1336 // get the srcLength if necessary 1337 srcLength = u_strlen(srcChars + srcStart); 1338 } 1339 1340 // calculate the size of the string after the replace 1341 int32_t newLength; 1342 1343 // optimize append() onto a large-enough, owned string 1344 if(start >= oldLength) { 1345 if(srcLength == 0) { 1346 return *this; 1347 } 1348 newLength = oldLength + srcLength; 1349 if(newLength <= getCapacity() && isBufferWritable()) { 1350 UChar *oldArray = getArrayStart(); 1351 // Do not copy characters when 1352 // UChar *buffer=str.getAppendBuffer(...); 1353 // is followed by 1354 // str.append(buffer, length); 1355 // or 1356 // str.appendString(buffer, length) 1357 // or similar. 1358 if(srcChars + srcStart != oldArray + start || start > oldLength) { 1359 us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength); 1360 } 1361 setLength(newLength); 1362 return *this; 1363 } else { 1364 // pin the indices to legal values 1365 start = oldLength; 1366 length = 0; 1367 } 1368 } else { 1369 // pin the indices to legal values 1370 pinIndices(start, length); 1371 1372 newLength = oldLength - length + srcLength; 1373 } 1374 1375 // the following may change fArray but will not copy the current contents; 1376 // therefore we need to keep the current fArray 1377 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1378 UChar *oldArray; 1379 if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) { 1380 // copy the stack buffer contents because it will be overwritten with 1381 // fUnion.fFields values 1382 u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength); 1383 oldArray = oldStackBuffer; 1384 } else { 1385 oldArray = getArrayStart(); 1386 } 1387 1388 // clone our array and allocate a bigger array if needed 1389 int32_t *bufferToDelete = 0; 1390 if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize, 1391 FALSE, &bufferToDelete) 1392 ) { 1393 return *this; 1394 } 1395 1396 // now do the replace 1397 1398 UChar *newArray = getArrayStart(); 1399 if(newArray != oldArray) { 1400 // if fArray changed, then we need to copy everything except what will change 1401 us_arrayCopy(oldArray, 0, newArray, 0, start); 1402 us_arrayCopy(oldArray, start + length, 1403 newArray, start + srcLength, 1404 oldLength - (start + length)); 1405 } else if(length != srcLength) { 1406 // fArray did not change; copy only the portion that isn't changing, leaving a hole 1407 us_arrayCopy(oldArray, start + length, 1408 newArray, start + srcLength, 1409 oldLength - (start + length)); 1410 } 1411 1412 // now fill in the hole with the new string 1413 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength); 1414 1415 setLength(newLength); 1416 1417 // delayed delete in case srcChars == fArray when we started, and 1418 // to keep oldArray alive for the above operations 1419 if (bufferToDelete) { 1420 uprv_free(bufferToDelete); 1421 } 1422 1423 return *this; 1424} 1425 1426/** 1427 * Replaceable API 1428 */ 1429void 1430UnicodeString::handleReplaceBetween(int32_t start, 1431 int32_t limit, 1432 const UnicodeString& text) { 1433 replaceBetween(start, limit, text); 1434} 1435 1436/** 1437 * Replaceable API 1438 */ 1439void 1440UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { 1441 if (limit <= start) { 1442 return; // Nothing to do; avoid bogus malloc call 1443 } 1444 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) ); 1445 // Check to make sure text is not null. 1446 if (text != NULL) { 1447 extractBetween(start, limit, text, 0); 1448 insert(dest, text, 0, limit - start); 1449 uprv_free(text); 1450 } 1451} 1452 1453/** 1454 * Replaceable API 1455 * 1456 * NOTE: This is for the Replaceable class. There is no rep.cpp, 1457 * so we implement this function here. 1458 */ 1459UBool Replaceable::hasMetaData() const { 1460 return TRUE; 1461} 1462 1463/** 1464 * Replaceable API 1465 */ 1466UBool UnicodeString::hasMetaData() const { 1467 return FALSE; 1468} 1469 1470UnicodeString& 1471UnicodeString::doReverse(int32_t start, int32_t length) { 1472 if(length <= 1 || !cloneArrayIfNeeded()) { 1473 return *this; 1474 } 1475 1476 // pin the indices to legal values 1477 pinIndices(start, length); 1478 if(length <= 1) { // pinIndices() might have shrunk the length 1479 return *this; 1480 } 1481 1482 UChar *left = getArrayStart() + start; 1483 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2) 1484 UChar swap; 1485 UBool hasSupplementary = FALSE; 1486 1487 // Before the loop we know left<right because length>=2. 1488 do { 1489 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left); 1490 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right); 1491 *right-- = swap; 1492 } while(left < right); 1493 // Make sure to test the middle code unit of an odd-length string. 1494 // Redundant if the length is even. 1495 hasSupplementary |= (UBool)U16_IS_LEAD(*left); 1496 1497 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */ 1498 if(hasSupplementary) { 1499 UChar swap2; 1500 1501 left = getArrayStart() + start; 1502 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right 1503 while(left < right) { 1504 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) { 1505 *left++ = swap2; 1506 *left++ = swap; 1507 } else { 1508 ++left; 1509 } 1510 } 1511 } 1512 1513 return *this; 1514} 1515 1516UBool 1517UnicodeString::padLeading(int32_t targetLength, 1518 UChar padChar) 1519{ 1520 int32_t oldLength = length(); 1521 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1522 return FALSE; 1523 } else { 1524 // move contents up by padding width 1525 UChar *array = getArrayStart(); 1526 int32_t start = targetLength - oldLength; 1527 us_arrayCopy(array, 0, array, start, oldLength); 1528 1529 // fill in padding character 1530 while(--start >= 0) { 1531 array[start] = padChar; 1532 } 1533 setLength(targetLength); 1534 return TRUE; 1535 } 1536} 1537 1538UBool 1539UnicodeString::padTrailing(int32_t targetLength, 1540 UChar padChar) 1541{ 1542 int32_t oldLength = length(); 1543 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1544 return FALSE; 1545 } else { 1546 // fill in padding character 1547 UChar *array = getArrayStart(); 1548 int32_t length = targetLength; 1549 while(--length >= oldLength) { 1550 array[length] = padChar; 1551 } 1552 setLength(targetLength); 1553 return TRUE; 1554 } 1555} 1556 1557//======================================== 1558// Hashing 1559//======================================== 1560int32_t 1561UnicodeString::doHashCode() const 1562{ 1563 /* Delegate hash computation to uhash. This makes UnicodeString 1564 * hashing consistent with UChar* hashing. */ 1565 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length()); 1566 if (hashCode == kInvalidHashCode) { 1567 hashCode = kEmptyHashCode; 1568 } 1569 return hashCode; 1570} 1571 1572//======================================== 1573// External Buffer 1574//======================================== 1575 1576UChar * 1577UnicodeString::getBuffer(int32_t minCapacity) { 1578 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { 1579 fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer; 1580 setZeroLength(); 1581 return getArrayStart(); 1582 } else { 1583 return 0; 1584 } 1585} 1586 1587void 1588UnicodeString::releaseBuffer(int32_t newLength) { 1589 if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) { 1590 // set the new fLength 1591 int32_t capacity=getCapacity(); 1592 if(newLength==-1) { 1593 // the new length is the string length, capped by fCapacity 1594 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity; 1595 while(p<limit && *p!=0) { 1596 ++p; 1597 } 1598 newLength=(int32_t)(p-array); 1599 } else if(newLength>capacity) { 1600 newLength=capacity; 1601 } 1602 setLength(newLength); 1603 fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer; 1604 } 1605} 1606 1607//======================================== 1608// Miscellaneous 1609//======================================== 1610UBool 1611UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, 1612 int32_t growCapacity, 1613 UBool doCopyArray, 1614 int32_t **pBufferToDelete, 1615 UBool forceClone) { 1616 // default parameters need to be static, therefore 1617 // the defaults are -1 to have convenience defaults 1618 if(newCapacity == -1) { 1619 newCapacity = getCapacity(); 1620 } 1621 1622 // while a getBuffer(minCapacity) is "open", 1623 // prevent any modifications of the string by returning FALSE here 1624 // if the string is bogus, then only an assignment or similar can revive it 1625 if(!isWritable()) { 1626 return FALSE; 1627 } 1628 1629 /* 1630 * We need to make a copy of the array if 1631 * the buffer is read-only, or 1632 * the buffer is refCounted (shared), and refCount>1, or 1633 * the buffer is too small. 1634 * Return FALSE if memory could not be allocated. 1635 */ 1636 if(forceClone || 1637 fUnion.fFields.fLengthAndFlags & kBufferIsReadonly || 1638 (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) || 1639 newCapacity > getCapacity() 1640 ) { 1641 // check growCapacity for default value and use of the stack buffer 1642 if(growCapacity < 0) { 1643 growCapacity = newCapacity; 1644 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { 1645 growCapacity = US_STACKBUF_SIZE; 1646 } 1647 1648 // save old values 1649 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1650 UChar *oldArray; 1651 int32_t oldLength = length(); 1652 int16_t flags = fUnion.fFields.fLengthAndFlags; 1653 1654 if(flags&kUsingStackBuffer) { 1655 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */ 1656 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) { 1657 // copy the stack buffer contents because it will be overwritten with 1658 // fUnion.fFields values 1659 us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength); 1660 oldArray = oldStackBuffer; 1661 } else { 1662 oldArray = NULL; // no need to copy from the stack buffer to itself 1663 } 1664 } else { 1665 oldArray = fUnion.fFields.fArray; 1666 U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */ 1667 } 1668 1669 // allocate a new array 1670 if(allocate(growCapacity) || 1671 (newCapacity < growCapacity && allocate(newCapacity)) 1672 ) { 1673 if(doCopyArray) { 1674 // copy the contents 1675 // do not copy more than what fits - it may be smaller than before 1676 int32_t minLength = oldLength; 1677 newCapacity = getCapacity(); 1678 if(newCapacity < minLength) { 1679 minLength = newCapacity; 1680 } 1681 if(oldArray != NULL) { 1682 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength); 1683 } 1684 setLength(minLength); 1685 } else { 1686 setZeroLength(); 1687 } 1688 1689 // release the old array 1690 if(flags & kRefCounted) { 1691 // the array is refCounted; decrement and release if 0 1692 u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1); 1693 if(umtx_atomic_dec(pRefCount) == 0) { 1694 if(pBufferToDelete == 0) { 1695 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t 1696 // is defined as volatile. (Volatile has useful non-standard behavior 1697 // with this compiler.) 1698 uprv_free((void *)pRefCount); 1699 } else { 1700 // the caller requested to delete it himself 1701 *pBufferToDelete = (int32_t *)pRefCount; 1702 } 1703 } 1704 } 1705 } else { 1706 // not enough memory for growCapacity and not even for the smaller newCapacity 1707 // reset the old values for setToBogus() to release the array 1708 if(!(flags&kUsingStackBuffer)) { 1709 fUnion.fFields.fArray = oldArray; 1710 } 1711 fUnion.fFields.fLengthAndFlags = flags; 1712 setToBogus(); 1713 return FALSE; 1714 } 1715 } 1716 return TRUE; 1717} 1718 1719// UnicodeStringAppendable ------------------------------------------------- *** 1720 1721UnicodeStringAppendable::~UnicodeStringAppendable() {} 1722 1723UBool 1724UnicodeStringAppendable::appendCodeUnit(UChar c) { 1725 return str.doReplace(str.length(), 0, &c, 0, 1).isWritable(); 1726} 1727 1728UBool 1729UnicodeStringAppendable::appendCodePoint(UChar32 c) { 1730 UChar buffer[U16_MAX_LENGTH]; 1731 int32_t cLength = 0; 1732 UBool isError = FALSE; 1733 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError); 1734 return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable(); 1735} 1736 1737UBool 1738UnicodeStringAppendable::appendString(const UChar *s, int32_t length) { 1739 return str.doReplace(str.length(), 0, s, 0, length).isWritable(); 1740} 1741 1742UBool 1743UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) { 1744 return str.cloneArrayIfNeeded(str.length() + appendCapacity); 1745} 1746 1747UChar * 1748UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity, 1749 int32_t desiredCapacityHint, 1750 UChar *scratch, int32_t scratchCapacity, 1751 int32_t *resultCapacity) { 1752 if(minCapacity < 1 || scratchCapacity < minCapacity) { 1753 *resultCapacity = 0; 1754 return NULL; 1755 } 1756 int32_t oldLength = str.length(); 1757 if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) { 1758 *resultCapacity = str.getCapacity() - oldLength; 1759 return str.getArrayStart() + oldLength; 1760 } 1761 *resultCapacity = scratchCapacity; 1762 return scratch; 1763} 1764 1765U_NAMESPACE_END 1766 1767U_NAMESPACE_USE 1768 1769U_CAPI int32_t U_EXPORT2 1770uhash_hashUnicodeString(const UElement key) { 1771 const UnicodeString *str = (const UnicodeString*) key.pointer; 1772 return (str == NULL) ? 0 : str->hashCode(); 1773} 1774 1775// Moved here from uhash_us.cpp so that using a UVector of UnicodeString* 1776// does not depend on hashtable code. 1777U_CAPI UBool U_EXPORT2 1778uhash_compareUnicodeString(const UElement key1, const UElement key2) { 1779 const UnicodeString *str1 = (const UnicodeString*) key1.pointer; 1780 const UnicodeString *str2 = (const UnicodeString*) key2.pointer; 1781 if (str1 == str2) { 1782 return TRUE; 1783 } 1784 if (str1 == NULL || str2 == NULL) { 1785 return FALSE; 1786 } 1787 return *str1 == *str2; 1788} 1789 1790#ifdef U_STATIC_IMPLEMENTATION 1791/* 1792This should never be called. It is defined here to make sure that the 1793virtual vector deleting destructor is defined within unistr.cpp. 1794The vector deleting destructor is already a part of UObject, 1795but defining it here makes sure that it is included with this object file. 1796This makes sure that static library dependencies are kept to a minimum. 1797*/ 1798static void uprv_UnicodeStringDummy(void) { 1799 delete [] (new UnicodeString[2]); 1800} 1801#endif 1802