1/* 2********************************************************************** 3* Copyright (C) 1998-2012, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* 7* File unistr.h 8* 9* Modification History: 10* 11* Date Name Description 12* 09/25/98 stephen Creation. 13* 11/11/98 stephen Changed per 11/9 code review. 14* 04/20/99 stephen Overhauled per 4/16 code review. 15* 11/18/99 aliu Made to inherit from Replaceable. Added method 16* handleReplaceBetween(); other methods unchanged. 17* 06/25/01 grhoten Remove dependency on iostream. 18****************************************************************************** 19*/ 20 21#ifndef UNISTR_H 22#define UNISTR_H 23 24/** 25 * \file 26 * \brief C++ API: Unicode String 27 */ 28 29#include "unicode/utypes.h" 30#include "unicode/rep.h" 31#include "unicode/std_string.h" 32#include "unicode/stringpiece.h" 33#include "unicode/bytestream.h" 34#include "unicode/ucasemap.h" 35 36struct UConverter; // unicode/ucnv.h 37class StringThreadTest; 38 39#ifndef U_COMPARE_CODE_POINT_ORDER 40/* see also ustring.h and unorm.h */ 41/** 42 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: 43 * Compare strings in code point order instead of code unit order. 44 * @stable ICU 2.2 45 */ 46#define U_COMPARE_CODE_POINT_ORDER 0x8000 47#endif 48 49#ifndef USTRING_H 50/** 51 * \ingroup ustring_ustrlen 52 */ 53U_STABLE int32_t U_EXPORT2 54u_strlen(const UChar *s); 55#endif 56 57/** 58 * \def U_STRING_CASE_MAPPER_DEFINED 59 * @internal 60 */ 61 62#ifndef U_STRING_CASE_MAPPER_DEFINED 63#define U_STRING_CASE_MAPPER_DEFINED 64 65/** 66 * Internal string case mapping function type. 67 * @internal 68 */ 69typedef int32_t U_CALLCONV 70UStringCaseMapper(const UCaseMap *csm, 71 UChar *dest, int32_t destCapacity, 72 const UChar *src, int32_t srcLength, 73 UErrorCode *pErrorCode); 74 75#endif 76 77U_NAMESPACE_BEGIN 78 79class BreakIterator; // unicode/brkiter.h 80class Locale; // unicode/locid.h 81class StringCharacterIterator; 82class UnicodeStringAppendable; // unicode/appendable.h 83 84/* The <iostream> include has been moved to unicode/ustream.h */ 85 86/** 87 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 88 * which constructs a Unicode string from an invariant-character char * string. 89 * About invariant characters see utypes.h. 90 * This constructor has no runtime dependency on conversion code and is 91 * therefore recommended over ones taking a charset name string 92 * (where the empty string "" indicates invariant-character conversion). 93 * 94 * @stable ICU 3.2 95 */ 96#define US_INV icu::UnicodeString::kInvariant 97 98/** 99 * Unicode String literals in C++. 100 * Dependent on the platform properties, different UnicodeString 101 * constructors should be used to create a UnicodeString object from 102 * a string literal. 103 * The macros are defined for maximum performance. 104 * They work only for strings that contain "invariant characters", i.e., 105 * only latin letters, digits, and some punctuation. 106 * See utypes.h for details. 107 * 108 * The string parameter must be a C string literal. 109 * The length of the string, not including the terminating 110 * <code>NUL</code>, must be specified as a constant. 111 * The U_STRING_DECL macro should be invoked exactly once for one 112 * such string variable before it is used. 113 * @stable ICU 2.0 114 */ 115#if defined(U_DECLARE_UTF16) 116# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) 117#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) 118# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length) 119#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 120# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length) 121#else 122# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV) 123#endif 124 125/** 126 * Unicode String literals in C++. 127 * Dependent on the platform properties, different UnicodeString 128 * constructors should be used to create a UnicodeString object from 129 * a string literal. 130 * The macros are defined for improved performance. 131 * They work only for strings that contain "invariant characters", i.e., 132 * only latin letters, digits, and some punctuation. 133 * See utypes.h for details. 134 * 135 * The string parameter must be a C string literal. 136 * @stable ICU 2.0 137 */ 138#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 139 140/** 141 * \def UNISTR_FROM_CHAR_EXPLICIT 142 * This can be defined to be empty or "explicit". 143 * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32) 144 * constructors are marked as explicit, preventing their inadvertent use. 145 * @draft ICU 49 146 */ 147#ifndef UNISTR_FROM_CHAR_EXPLICIT 148# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 149 // Auto-"explicit" in ICU library code. 150# define UNISTR_FROM_CHAR_EXPLICIT explicit 151# else 152 // Empty by default for source code compatibility. 153# define UNISTR_FROM_CHAR_EXPLICIT 154# endif 155#endif 156 157/** 158 * \def UNISTR_FROM_STRING_EXPLICIT 159 * This can be defined to be empty or "explicit". 160 * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *) 161 * constructors are marked as explicit, preventing their inadvertent use. 162 * 163 * In particular, this helps prevent accidentally depending on ICU conversion code 164 * by passing a string literal into an API with a const UnicodeString & parameter. 165 * @draft ICU 49 166 */ 167#ifndef UNISTR_FROM_STRING_EXPLICIT 168# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 169 // Auto-"explicit" in ICU library code. 170# define UNISTR_FROM_STRING_EXPLICIT explicit 171# else 172 // Empty by default for source code compatibility. 173# define UNISTR_FROM_STRING_EXPLICIT 174# endif 175#endif 176 177/** 178 * UnicodeString is a string class that stores Unicode characters directly and provides 179 * similar functionality as the Java String and StringBuffer classes. 180 * It is a concrete implementation of the abstract class Replaceable (for transliteration). 181 * 182 * The UnicodeString class is not suitable for subclassing. 183 * 184 * <p>For an overview of Unicode strings in C and C++ see the 185 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> 186 * 187 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>. 188 * A Unicode character may be stored with either one code unit 189 * (the most common case) or with a matched pair of special code units 190 * ("surrogates"). The data type for code units is UChar. 191 * For single-character handling, a Unicode character code <em>point</em> is a value 192 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p> 193 * 194 * <p>Indexes and offsets into and lengths of strings always count code units, not code points. 195 * This is the same as with multi-byte char* strings in traditional string handling. 196 * Operations on partial strings typically do not test for code point boundaries. 197 * If necessary, the user needs to take care of such boundaries by testing for the code unit 198 * values or by using functions like 199 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() 200 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p> 201 * 202 * UnicodeString methods are more lenient with regard to input parameter values 203 * than other ICU APIs. In particular: 204 * - If indexes are out of bounds for a UnicodeString object 205 * (<0 or >length()) then they are "pinned" to the nearest boundary. 206 * - If primitive string pointer values (e.g., const UChar * or char *) 207 * for input strings are NULL, then those input string parameters are treated 208 * as if they pointed to an empty string. 209 * However, this is <em>not</em> the case for char * parameters for charset names 210 * or other IDs. 211 * - Most UnicodeString methods do not take a UErrorCode parameter because 212 * there are usually very few opportunities for failure other than a shortage 213 * of memory, error codes in low-level C++ string methods would be inconvenient, 214 * and the error code as the last parameter (ICU convention) would prevent 215 * the use of default parameter values. 216 * Instead, such methods set the UnicodeString into a "bogus" state 217 * (see isBogus()) if an error occurs. 218 * 219 * In string comparisons, two UnicodeString objects that are both "bogus" 220 * compare equal (to be transitive and prevent endless loops in sorting), 221 * and a "bogus" string compares less than any non-"bogus" one. 222 * 223 * Const UnicodeString methods are thread-safe. Multiple threads can use 224 * const methods on the same UnicodeString object simultaneously, 225 * but non-const methods must not be called concurrently (in multiple threads) 226 * with any other (const or non-const) methods. 227 * 228 * Similarly, const UnicodeString & parameters are thread-safe. 229 * One object may be passed in as such a parameter concurrently in multiple threads. 230 * This includes the const UnicodeString & parameters for 231 * copy construction, assignment, and cloning. 232 * 233 * <p>UnicodeString uses several storage methods. 234 * String contents can be stored inside the UnicodeString object itself, 235 * in an allocated and shared buffer, or in an outside buffer that is "aliased". 236 * Most of this is done transparently, but careful aliasing in particular provides 237 * significant performance improvements. 238 * Also, the internal buffer is accessible via special functions. 239 * For details see the 240 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> 241 * 242 * @see utf.h 243 * @see CharacterIterator 244 * @stable ICU 2.0 245 */ 246class U_COMMON_API UnicodeString : public Replaceable 247{ 248public: 249 250 /** 251 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 252 * which constructs a Unicode string from an invariant-character char * string. 253 * Use the macro US_INV instead of the full qualification for this value. 254 * 255 * @see US_INV 256 * @stable ICU 3.2 257 */ 258 enum EInvariant { 259 /** 260 * @see EInvariant 261 * @stable ICU 3.2 262 */ 263 kInvariant 264 }; 265 266 //======================================== 267 // Read-only operations 268 //======================================== 269 270 /* Comparison - bitwise only - for international comparison use collation */ 271 272 /** 273 * Equality operator. Performs only bitwise comparison. 274 * @param text The UnicodeString to compare to this one. 275 * @return TRUE if <TT>text</TT> contains the same characters as this one, 276 * FALSE otherwise. 277 * @stable ICU 2.0 278 */ 279 inline UBool operator== (const UnicodeString& text) const; 280 281 /** 282 * Inequality operator. Performs only bitwise comparison. 283 * @param text The UnicodeString to compare to this one. 284 * @return FALSE if <TT>text</TT> contains the same characters as this one, 285 * TRUE otherwise. 286 * @stable ICU 2.0 287 */ 288 inline UBool operator!= (const UnicodeString& text) const; 289 290 /** 291 * Greater than operator. Performs only bitwise comparison. 292 * @param text The UnicodeString to compare to this one. 293 * @return TRUE if the characters in this are bitwise 294 * greater than the characters in <code>text</code>, FALSE otherwise 295 * @stable ICU 2.0 296 */ 297 inline UBool operator> (const UnicodeString& text) const; 298 299 /** 300 * Less than operator. Performs only bitwise comparison. 301 * @param text The UnicodeString to compare to this one. 302 * @return TRUE if the characters in this are bitwise 303 * less than the characters in <code>text</code>, FALSE otherwise 304 * @stable ICU 2.0 305 */ 306 inline UBool operator< (const UnicodeString& text) const; 307 308 /** 309 * Greater than or equal operator. Performs only bitwise comparison. 310 * @param text The UnicodeString to compare to this one. 311 * @return TRUE if the characters in this are bitwise 312 * greater than or equal to the characters in <code>text</code>, FALSE otherwise 313 * @stable ICU 2.0 314 */ 315 inline UBool operator>= (const UnicodeString& text) const; 316 317 /** 318 * Less than or equal operator. Performs only bitwise comparison. 319 * @param text The UnicodeString to compare to this one. 320 * @return TRUE if the characters in this are bitwise 321 * less than or equal to the characters in <code>text</code>, FALSE otherwise 322 * @stable ICU 2.0 323 */ 324 inline UBool operator<= (const UnicodeString& text) const; 325 326 /** 327 * Compare the characters bitwise in this UnicodeString to 328 * the characters in <code>text</code>. 329 * @param text The UnicodeString to compare to this one. 330 * @return The result of bitwise character comparison: 0 if this 331 * contains the same characters as <code>text</code>, -1 if the characters in 332 * this are bitwise less than the characters in <code>text</code>, +1 if the 333 * characters in this are bitwise greater than the characters 334 * in <code>text</code>. 335 * @stable ICU 2.0 336 */ 337 inline int8_t compare(const UnicodeString& text) const; 338 339 /** 340 * Compare the characters bitwise in the range 341 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 342 * in <TT>text</TT> 343 * @param start the offset at which the compare operation begins 344 * @param length the number of characters of text to compare. 345 * @param text the other text to be compared against this string. 346 * @return The result of bitwise character comparison: 0 if this 347 * contains the same characters as <code>text</code>, -1 if the characters in 348 * this are bitwise less than the characters in <code>text</code>, +1 if the 349 * characters in this are bitwise greater than the characters 350 * in <code>text</code>. 351 * @stable ICU 2.0 352 */ 353 inline int8_t compare(int32_t start, 354 int32_t length, 355 const UnicodeString& text) const; 356 357 /** 358 * Compare the characters bitwise in the range 359 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 360 * in <TT>srcText</TT> in the range 361 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 362 * @param start the offset at which the compare operation begins 363 * @param length the number of characters in this to compare. 364 * @param srcText the text to be compared 365 * @param srcStart the offset into <TT>srcText</TT> to start comparison 366 * @param srcLength the number of characters in <TT>src</TT> to compare 367 * @return The result of bitwise character comparison: 0 if this 368 * contains the same characters as <code>srcText</code>, -1 if the characters in 369 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 370 * characters in this are bitwise greater than the characters 371 * in <code>srcText</code>. 372 * @stable ICU 2.0 373 */ 374 inline int8_t compare(int32_t start, 375 int32_t length, 376 const UnicodeString& srcText, 377 int32_t srcStart, 378 int32_t srcLength) const; 379 380 /** 381 * Compare the characters bitwise in this UnicodeString with the first 382 * <TT>srcLength</TT> characters in <TT>srcChars</TT>. 383 * @param srcChars The characters to compare to this UnicodeString. 384 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 385 * @return The result of bitwise character comparison: 0 if this 386 * contains the same characters as <code>srcChars</code>, -1 if the characters in 387 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 388 * characters in this are bitwise greater than the characters 389 * in <code>srcChars</code>. 390 * @stable ICU 2.0 391 */ 392 inline int8_t compare(const UChar *srcChars, 393 int32_t srcLength) const; 394 395 /** 396 * Compare the characters bitwise in the range 397 * [<TT>start</TT>, <TT>start + length</TT>) with the first 398 * <TT>length</TT> characters in <TT>srcChars</TT> 399 * @param start the offset at which the compare operation begins 400 * @param length the number of characters to compare. 401 * @param srcChars the characters to be compared 402 * @return The result of bitwise character comparison: 0 if this 403 * contains the same characters as <code>srcChars</code>, -1 if the characters in 404 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 405 * characters in this are bitwise greater than the characters 406 * in <code>srcChars</code>. 407 * @stable ICU 2.0 408 */ 409 inline int8_t compare(int32_t start, 410 int32_t length, 411 const UChar *srcChars) const; 412 413 /** 414 * Compare the characters bitwise in the range 415 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 416 * in <TT>srcChars</TT> in the range 417 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 418 * @param start the offset at which the compare operation begins 419 * @param length the number of characters in this to compare 420 * @param srcChars the characters to be compared 421 * @param srcStart the offset into <TT>srcChars</TT> to start comparison 422 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 423 * @return The result of bitwise character comparison: 0 if this 424 * contains the same characters as <code>srcChars</code>, -1 if the characters in 425 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 426 * characters in this are bitwise greater than the characters 427 * in <code>srcChars</code>. 428 * @stable ICU 2.0 429 */ 430 inline int8_t compare(int32_t start, 431 int32_t length, 432 const UChar *srcChars, 433 int32_t srcStart, 434 int32_t srcLength) const; 435 436 /** 437 * Compare the characters bitwise in the range 438 * [<TT>start</TT>, <TT>limit</TT>) with the characters 439 * in <TT>srcText</TT> in the range 440 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). 441 * @param start the offset at which the compare operation begins 442 * @param limit the offset immediately following the compare operation 443 * @param srcText the text to be compared 444 * @param srcStart the offset into <TT>srcText</TT> to start comparison 445 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison 446 * @return The result of bitwise character comparison: 0 if this 447 * contains the same characters as <code>srcText</code>, -1 if the characters in 448 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 449 * characters in this are bitwise greater than the characters 450 * in <code>srcText</code>. 451 * @stable ICU 2.0 452 */ 453 inline int8_t compareBetween(int32_t start, 454 int32_t limit, 455 const UnicodeString& srcText, 456 int32_t srcStart, 457 int32_t srcLimit) const; 458 459 /** 460 * Compare two Unicode strings in code point order. 461 * The result may be different from the results of compare(), operator<, etc. 462 * if supplementary characters are present: 463 * 464 * In UTF-16, supplementary characters (with code points U+10000 and above) are 465 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 466 * which means that they compare as less than some other BMP characters like U+feff. 467 * This function compares Unicode strings in code point order. 468 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 469 * 470 * @param text Another string to compare this one to. 471 * @return a negative/zero/positive integer corresponding to whether 472 * this string is less than/equal to/greater than the second one 473 * in code point order 474 * @stable ICU 2.0 475 */ 476 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 477 478 /** 479 * Compare two Unicode strings in code point order. 480 * The result may be different from the results of compare(), operator<, etc. 481 * if supplementary characters are present: 482 * 483 * In UTF-16, supplementary characters (with code points U+10000 and above) are 484 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 485 * which means that they compare as less than some other BMP characters like U+feff. 486 * This function compares Unicode strings in code point order. 487 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 488 * 489 * @param start The start offset in this string at which the compare operation begins. 490 * @param length The number of code units from this string to compare. 491 * @param srcText Another string to compare this one to. 492 * @return a negative/zero/positive integer corresponding to whether 493 * this string is less than/equal to/greater than the second one 494 * in code point order 495 * @stable ICU 2.0 496 */ 497 inline int8_t compareCodePointOrder(int32_t start, 498 int32_t length, 499 const UnicodeString& srcText) const; 500 501 /** 502 * Compare two Unicode strings in code point order. 503 * The result may be different from the results of compare(), operator<, etc. 504 * if supplementary characters are present: 505 * 506 * In UTF-16, supplementary characters (with code points U+10000 and above) are 507 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 508 * which means that they compare as less than some other BMP characters like U+feff. 509 * This function compares Unicode strings in code point order. 510 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 511 * 512 * @param start The start offset in this string at which the compare operation begins. 513 * @param length The number of code units from this string to compare. 514 * @param srcText Another string to compare this one to. 515 * @param srcStart The start offset in that string at which the compare operation begins. 516 * @param srcLength The number of code units from that string to compare. 517 * @return a negative/zero/positive integer corresponding to whether 518 * this string is less than/equal to/greater than the second one 519 * in code point order 520 * @stable ICU 2.0 521 */ 522 inline int8_t compareCodePointOrder(int32_t start, 523 int32_t length, 524 const UnicodeString& srcText, 525 int32_t srcStart, 526 int32_t srcLength) const; 527 528 /** 529 * Compare two Unicode strings in code point order. 530 * The result may be different from the results of compare(), operator<, etc. 531 * if supplementary characters are present: 532 * 533 * In UTF-16, supplementary characters (with code points U+10000 and above) are 534 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 535 * which means that they compare as less than some other BMP characters like U+feff. 536 * This function compares Unicode strings in code point order. 537 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 538 * 539 * @param srcChars A pointer to another string to compare this one to. 540 * @param srcLength The number of code units from that string to compare. 541 * @return a negative/zero/positive integer corresponding to whether 542 * this string is less than/equal to/greater than the second one 543 * in code point order 544 * @stable ICU 2.0 545 */ 546 inline int8_t compareCodePointOrder(const UChar *srcChars, 547 int32_t srcLength) const; 548 549 /** 550 * Compare two Unicode strings in code point order. 551 * The result may be different from the results of compare(), operator<, etc. 552 * if supplementary characters are present: 553 * 554 * In UTF-16, supplementary characters (with code points U+10000 and above) are 555 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 556 * which means that they compare as less than some other BMP characters like U+feff. 557 * This function compares Unicode strings in code point order. 558 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 559 * 560 * @param start The start offset in this string at which the compare operation begins. 561 * @param length The number of code units from this string to compare. 562 * @param srcChars A pointer to another string to compare this one to. 563 * @return a negative/zero/positive integer corresponding to whether 564 * this string is less than/equal to/greater than the second one 565 * in code point order 566 * @stable ICU 2.0 567 */ 568 inline int8_t compareCodePointOrder(int32_t start, 569 int32_t length, 570 const UChar *srcChars) const; 571 572 /** 573 * Compare two Unicode strings in code point order. 574 * The result may be different from the results of compare(), operator<, etc. 575 * if supplementary characters are present: 576 * 577 * In UTF-16, supplementary characters (with code points U+10000 and above) are 578 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 579 * which means that they compare as less than some other BMP characters like U+feff. 580 * This function compares Unicode strings in code point order. 581 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 582 * 583 * @param start The start offset in this string at which the compare operation begins. 584 * @param length The number of code units from this string to compare. 585 * @param srcChars A pointer to another string to compare this one to. 586 * @param srcStart The start offset in that string at which the compare operation begins. 587 * @param srcLength The number of code units from that string to compare. 588 * @return a negative/zero/positive integer corresponding to whether 589 * this string is less than/equal to/greater than the second one 590 * in code point order 591 * @stable ICU 2.0 592 */ 593 inline int8_t compareCodePointOrder(int32_t start, 594 int32_t length, 595 const UChar *srcChars, 596 int32_t srcStart, 597 int32_t srcLength) const; 598 599 /** 600 * Compare two Unicode strings in code point order. 601 * The result may be different from the results of compare(), operator<, etc. 602 * if supplementary characters are present: 603 * 604 * In UTF-16, supplementary characters (with code points U+10000 and above) are 605 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 606 * which means that they compare as less than some other BMP characters like U+feff. 607 * This function compares Unicode strings in code point order. 608 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 609 * 610 * @param start The start offset in this string at which the compare operation begins. 611 * @param limit The offset after the last code unit from this string to compare. 612 * @param srcText Another string to compare this one to. 613 * @param srcStart The start offset in that string at which the compare operation begins. 614 * @param srcLimit The offset after the last code unit from that string to compare. 615 * @return a negative/zero/positive integer corresponding to whether 616 * this string is less than/equal to/greater than the second one 617 * in code point order 618 * @stable ICU 2.0 619 */ 620 inline int8_t compareCodePointOrderBetween(int32_t start, 621 int32_t limit, 622 const UnicodeString& srcText, 623 int32_t srcStart, 624 int32_t srcLimit) const; 625 626 /** 627 * Compare two strings case-insensitively using full case folding. 628 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). 629 * 630 * @param text Another string to compare this one to. 631 * @param options A bit set of options: 632 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 633 * Comparison in code unit order with default case folding. 634 * 635 * - U_COMPARE_CODE_POINT_ORDER 636 * Set to choose code point order instead of code unit order 637 * (see u_strCompare for details). 638 * 639 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 640 * 641 * @return A negative, zero, or positive integer indicating the comparison result. 642 * @stable ICU 2.0 643 */ 644 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 645 646 /** 647 * Compare two strings case-insensitively using full case folding. 648 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 649 * 650 * @param start The start offset in this string at which the compare operation begins. 651 * @param length The number of code units from this string to compare. 652 * @param srcText Another string to compare this one to. 653 * @param options A bit set of options: 654 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 655 * Comparison in code unit order with default case folding. 656 * 657 * - U_COMPARE_CODE_POINT_ORDER 658 * Set to choose code point order instead of code unit order 659 * (see u_strCompare for details). 660 * 661 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 662 * 663 * @return A negative, zero, or positive integer indicating the comparison result. 664 * @stable ICU 2.0 665 */ 666 inline int8_t caseCompare(int32_t start, 667 int32_t length, 668 const UnicodeString& srcText, 669 uint32_t options) const; 670 671 /** 672 * Compare two strings case-insensitively using full case folding. 673 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 674 * 675 * @param start The start offset in this string at which the compare operation begins. 676 * @param length The number of code units from this string to compare. 677 * @param srcText Another string to compare this one to. 678 * @param srcStart The start offset in that string at which the compare operation begins. 679 * @param srcLength The number of code units from that string to compare. 680 * @param options A bit set of options: 681 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 682 * Comparison in code unit order with default case folding. 683 * 684 * - U_COMPARE_CODE_POINT_ORDER 685 * Set to choose code point order instead of code unit order 686 * (see u_strCompare for details). 687 * 688 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 689 * 690 * @return A negative, zero, or positive integer indicating the comparison result. 691 * @stable ICU 2.0 692 */ 693 inline int8_t caseCompare(int32_t start, 694 int32_t length, 695 const UnicodeString& srcText, 696 int32_t srcStart, 697 int32_t srcLength, 698 uint32_t options) const; 699 700 /** 701 * Compare two strings case-insensitively using full case folding. 702 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 703 * 704 * @param srcChars A pointer to another string to compare this one to. 705 * @param srcLength The number of code units from that string to compare. 706 * @param options A bit set of options: 707 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 708 * Comparison in code unit order with default case folding. 709 * 710 * - U_COMPARE_CODE_POINT_ORDER 711 * Set to choose code point order instead of code unit order 712 * (see u_strCompare for details). 713 * 714 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 715 * 716 * @return A negative, zero, or positive integer indicating the comparison result. 717 * @stable ICU 2.0 718 */ 719 inline int8_t caseCompare(const UChar *srcChars, 720 int32_t srcLength, 721 uint32_t options) const; 722 723 /** 724 * Compare two strings case-insensitively using full case folding. 725 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 726 * 727 * @param start The start offset in this string at which the compare operation begins. 728 * @param length The number of code units from this string to compare. 729 * @param srcChars A pointer to another string to compare this one to. 730 * @param options A bit set of options: 731 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 732 * Comparison in code unit order with default case folding. 733 * 734 * - U_COMPARE_CODE_POINT_ORDER 735 * Set to choose code point order instead of code unit order 736 * (see u_strCompare for details). 737 * 738 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 739 * 740 * @return A negative, zero, or positive integer indicating the comparison result. 741 * @stable ICU 2.0 742 */ 743 inline int8_t caseCompare(int32_t start, 744 int32_t length, 745 const UChar *srcChars, 746 uint32_t options) const; 747 748 /** 749 * Compare two strings case-insensitively using full case folding. 750 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 751 * 752 * @param start The start offset in this string at which the compare operation begins. 753 * @param length The number of code units from this string to compare. 754 * @param srcChars A pointer to another string to compare this one to. 755 * @param srcStart The start offset in that string at which the compare operation begins. 756 * @param srcLength The number of code units from that string to compare. 757 * @param options A bit set of options: 758 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 759 * Comparison in code unit order with default case folding. 760 * 761 * - U_COMPARE_CODE_POINT_ORDER 762 * Set to choose code point order instead of code unit order 763 * (see u_strCompare for details). 764 * 765 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 766 * 767 * @return A negative, zero, or positive integer indicating the comparison result. 768 * @stable ICU 2.0 769 */ 770 inline int8_t caseCompare(int32_t start, 771 int32_t length, 772 const UChar *srcChars, 773 int32_t srcStart, 774 int32_t srcLength, 775 uint32_t options) const; 776 777 /** 778 * Compare two strings case-insensitively using full case folding. 779 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). 780 * 781 * @param start The start offset in this string at which the compare operation begins. 782 * @param limit The offset after the last code unit from this string to compare. 783 * @param srcText Another string to compare this one to. 784 * @param srcStart The start offset in that string at which the compare operation begins. 785 * @param srcLimit The offset after the last code unit from that string to compare. 786 * @param options A bit set of options: 787 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 788 * Comparison in code unit order with default case folding. 789 * 790 * - U_COMPARE_CODE_POINT_ORDER 791 * Set to choose code point order instead of code unit order 792 * (see u_strCompare for details). 793 * 794 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 795 * 796 * @return A negative, zero, or positive integer indicating the comparison result. 797 * @stable ICU 2.0 798 */ 799 inline int8_t caseCompareBetween(int32_t start, 800 int32_t limit, 801 const UnicodeString& srcText, 802 int32_t srcStart, 803 int32_t srcLimit, 804 uint32_t options) const; 805 806 /** 807 * Determine if this starts with the characters in <TT>text</TT> 808 * @param text The text to match. 809 * @return TRUE if this starts with the characters in <TT>text</TT>, 810 * FALSE otherwise 811 * @stable ICU 2.0 812 */ 813 inline UBool startsWith(const UnicodeString& text) const; 814 815 /** 816 * Determine if this starts with the characters in <TT>srcText</TT> 817 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 818 * @param srcText The text to match. 819 * @param srcStart the offset into <TT>srcText</TT> to start matching 820 * @param srcLength the number of characters in <TT>srcText</TT> to match 821 * @return TRUE if this starts with the characters in <TT>text</TT>, 822 * FALSE otherwise 823 * @stable ICU 2.0 824 */ 825 inline UBool startsWith(const UnicodeString& srcText, 826 int32_t srcStart, 827 int32_t srcLength) const; 828 829 /** 830 * Determine if this starts with the characters in <TT>srcChars</TT> 831 * @param srcChars The characters to match. 832 * @param srcLength the number of characters in <TT>srcChars</TT> 833 * @return TRUE if this starts with the characters in <TT>srcChars</TT>, 834 * FALSE otherwise 835 * @stable ICU 2.0 836 */ 837 inline UBool startsWith(const UChar *srcChars, 838 int32_t srcLength) const; 839 840 /** 841 * Determine if this ends with the characters in <TT>srcChars</TT> 842 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 843 * @param srcChars The characters to match. 844 * @param srcStart the offset into <TT>srcText</TT> to start matching 845 * @param srcLength the number of characters in <TT>srcChars</TT> to match 846 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise 847 * @stable ICU 2.0 848 */ 849 inline UBool startsWith(const UChar *srcChars, 850 int32_t srcStart, 851 int32_t srcLength) const; 852 853 /** 854 * Determine if this ends with the characters in <TT>text</TT> 855 * @param text The text to match. 856 * @return TRUE if this ends with the characters in <TT>text</TT>, 857 * FALSE otherwise 858 * @stable ICU 2.0 859 */ 860 inline UBool endsWith(const UnicodeString& text) const; 861 862 /** 863 * Determine if this ends with the characters in <TT>srcText</TT> 864 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 865 * @param srcText The text to match. 866 * @param srcStart the offset into <TT>srcText</TT> to start matching 867 * @param srcLength the number of characters in <TT>srcText</TT> to match 868 * @return TRUE if this ends with the characters in <TT>text</TT>, 869 * FALSE otherwise 870 * @stable ICU 2.0 871 */ 872 inline UBool endsWith(const UnicodeString& srcText, 873 int32_t srcStart, 874 int32_t srcLength) const; 875 876 /** 877 * Determine if this ends with the characters in <TT>srcChars</TT> 878 * @param srcChars The characters to match. 879 * @param srcLength the number of characters in <TT>srcChars</TT> 880 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 881 * FALSE otherwise 882 * @stable ICU 2.0 883 */ 884 inline UBool endsWith(const UChar *srcChars, 885 int32_t srcLength) const; 886 887 /** 888 * Determine if this ends with the characters in <TT>srcChars</TT> 889 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 890 * @param srcChars The characters to match. 891 * @param srcStart the offset into <TT>srcText</TT> to start matching 892 * @param srcLength the number of characters in <TT>srcChars</TT> to match 893 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 894 * FALSE otherwise 895 * @stable ICU 2.0 896 */ 897 inline UBool endsWith(const UChar *srcChars, 898 int32_t srcStart, 899 int32_t srcLength) const; 900 901 902 /* Searching - bitwise only */ 903 904 /** 905 * Locate in this the first occurrence of the characters in <TT>text</TT>, 906 * using bitwise comparison. 907 * @param text The text to search for. 908 * @return The offset into this of the start of <TT>text</TT>, 909 * or -1 if not found. 910 * @stable ICU 2.0 911 */ 912 inline int32_t indexOf(const UnicodeString& text) const; 913 914 /** 915 * Locate in this the first occurrence of the characters in <TT>text</TT> 916 * starting at offset <TT>start</TT>, using bitwise comparison. 917 * @param text The text to search for. 918 * @param start The offset at which searching will start. 919 * @return The offset into this of the start of <TT>text</TT>, 920 * or -1 if not found. 921 * @stable ICU 2.0 922 */ 923 inline int32_t indexOf(const UnicodeString& text, 924 int32_t start) const; 925 926 /** 927 * Locate in this the first occurrence in the range 928 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 929 * in <TT>text</TT>, using bitwise comparison. 930 * @param text The text to search for. 931 * @param start The offset at which searching will start. 932 * @param length The number of characters to search 933 * @return The offset into this of the start of <TT>text</TT>, 934 * or -1 if not found. 935 * @stable ICU 2.0 936 */ 937 inline int32_t indexOf(const UnicodeString& text, 938 int32_t start, 939 int32_t length) const; 940 941 /** 942 * Locate in this the first occurrence in the range 943 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 944 * in <TT>srcText</TT> in the range 945 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 946 * using bitwise comparison. 947 * @param srcText The text to search for. 948 * @param srcStart the offset into <TT>srcText</TT> at which 949 * to start matching 950 * @param srcLength the number of characters in <TT>srcText</TT> to match 951 * @param start the offset into this at which to start matching 952 * @param length the number of characters in this to search 953 * @return The offset into this of the start of <TT>text</TT>, 954 * or -1 if not found. 955 * @stable ICU 2.0 956 */ 957 inline int32_t indexOf(const UnicodeString& srcText, 958 int32_t srcStart, 959 int32_t srcLength, 960 int32_t start, 961 int32_t length) const; 962 963 /** 964 * Locate in this the first occurrence of the characters in 965 * <TT>srcChars</TT> 966 * starting at offset <TT>start</TT>, using bitwise comparison. 967 * @param srcChars The text to search for. 968 * @param srcLength the number of characters in <TT>srcChars</TT> to match 969 * @param start the offset into this at which to start matching 970 * @return The offset into this of the start of <TT>text</TT>, 971 * or -1 if not found. 972 * @stable ICU 2.0 973 */ 974 inline int32_t indexOf(const UChar *srcChars, 975 int32_t srcLength, 976 int32_t start) const; 977 978 /** 979 * Locate in this the first occurrence in the range 980 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 981 * in <TT>srcChars</TT>, using bitwise comparison. 982 * @param srcChars The text to search for. 983 * @param srcLength the number of characters in <TT>srcChars</TT> 984 * @param start The offset at which searching will start. 985 * @param length The number of characters to search 986 * @return The offset into this of the start of <TT>srcChars</TT>, 987 * or -1 if not found. 988 * @stable ICU 2.0 989 */ 990 inline int32_t indexOf(const UChar *srcChars, 991 int32_t srcLength, 992 int32_t start, 993 int32_t length) const; 994 995 /** 996 * Locate in this the first occurrence in the range 997 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 998 * in <TT>srcChars</TT> in the range 999 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1000 * using bitwise comparison. 1001 * @param srcChars The text to search for. 1002 * @param srcStart the offset into <TT>srcChars</TT> at which 1003 * to start matching 1004 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1005 * @param start the offset into this at which to start matching 1006 * @param length the number of characters in this to search 1007 * @return The offset into this of the start of <TT>text</TT>, 1008 * or -1 if not found. 1009 * @stable ICU 2.0 1010 */ 1011 int32_t indexOf(const UChar *srcChars, 1012 int32_t srcStart, 1013 int32_t srcLength, 1014 int32_t start, 1015 int32_t length) const; 1016 1017 /** 1018 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1019 * using bitwise comparison. 1020 * @param c The code unit to search for. 1021 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1022 * @stable ICU 2.0 1023 */ 1024 inline int32_t indexOf(UChar c) const; 1025 1026 /** 1027 * Locate in this the first occurrence of the code point <TT>c</TT>, 1028 * using bitwise comparison. 1029 * 1030 * @param c The code point to search for. 1031 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1032 * @stable ICU 2.0 1033 */ 1034 inline int32_t indexOf(UChar32 c) const; 1035 1036 /** 1037 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1038 * starting at offset <TT>start</TT>, using bitwise comparison. 1039 * @param c The code unit to search for. 1040 * @param start The offset at which searching will start. 1041 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1042 * @stable ICU 2.0 1043 */ 1044 inline int32_t indexOf(UChar c, 1045 int32_t start) const; 1046 1047 /** 1048 * Locate in this the first occurrence of the code point <TT>c</TT> 1049 * starting at offset <TT>start</TT>, using bitwise comparison. 1050 * 1051 * @param c The code point to search for. 1052 * @param start The offset at which searching will start. 1053 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1054 * @stable ICU 2.0 1055 */ 1056 inline int32_t indexOf(UChar32 c, 1057 int32_t start) const; 1058 1059 /** 1060 * Locate in this the first occurrence of the BMP code point <code>c</code> 1061 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1062 * using bitwise comparison. 1063 * @param c The code unit to search for. 1064 * @param start the offset into this at which to start matching 1065 * @param length the number of characters in this to search 1066 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1067 * @stable ICU 2.0 1068 */ 1069 inline int32_t indexOf(UChar c, 1070 int32_t start, 1071 int32_t length) const; 1072 1073 /** 1074 * Locate in this the first occurrence of the code point <TT>c</TT> 1075 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1076 * using bitwise comparison. 1077 * 1078 * @param c The code point to search for. 1079 * @param start the offset into this at which to start matching 1080 * @param length the number of characters in this to search 1081 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1082 * @stable ICU 2.0 1083 */ 1084 inline int32_t indexOf(UChar32 c, 1085 int32_t start, 1086 int32_t length) const; 1087 1088 /** 1089 * Locate in this the last occurrence of the characters in <TT>text</TT>, 1090 * using bitwise comparison. 1091 * @param text The text to search for. 1092 * @return The offset into this of the start of <TT>text</TT>, 1093 * or -1 if not found. 1094 * @stable ICU 2.0 1095 */ 1096 inline int32_t lastIndexOf(const UnicodeString& text) const; 1097 1098 /** 1099 * Locate in this the last occurrence of the characters in <TT>text</TT> 1100 * starting at offset <TT>start</TT>, using bitwise comparison. 1101 * @param text The text to search for. 1102 * @param start The offset at which searching will start. 1103 * @return The offset into this of the start of <TT>text</TT>, 1104 * or -1 if not found. 1105 * @stable ICU 2.0 1106 */ 1107 inline int32_t lastIndexOf(const UnicodeString& text, 1108 int32_t start) const; 1109 1110 /** 1111 * Locate in this the last occurrence in the range 1112 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1113 * in <TT>text</TT>, using bitwise comparison. 1114 * @param text The text to search for. 1115 * @param start The offset at which searching will start. 1116 * @param length The number of characters to search 1117 * @return The offset into this of the start of <TT>text</TT>, 1118 * or -1 if not found. 1119 * @stable ICU 2.0 1120 */ 1121 inline int32_t lastIndexOf(const UnicodeString& text, 1122 int32_t start, 1123 int32_t length) const; 1124 1125 /** 1126 * Locate in this the last occurrence in the range 1127 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1128 * in <TT>srcText</TT> in the range 1129 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1130 * using bitwise comparison. 1131 * @param srcText The text to search for. 1132 * @param srcStart the offset into <TT>srcText</TT> at which 1133 * to start matching 1134 * @param srcLength the number of characters in <TT>srcText</TT> to match 1135 * @param start the offset into this at which to start matching 1136 * @param length the number of characters in this to search 1137 * @return The offset into this of the start of <TT>text</TT>, 1138 * or -1 if not found. 1139 * @stable ICU 2.0 1140 */ 1141 inline int32_t lastIndexOf(const UnicodeString& srcText, 1142 int32_t srcStart, 1143 int32_t srcLength, 1144 int32_t start, 1145 int32_t length) const; 1146 1147 /** 1148 * Locate in this the last occurrence of the characters in <TT>srcChars</TT> 1149 * starting at offset <TT>start</TT>, using bitwise comparison. 1150 * @param srcChars The text to search for. 1151 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1152 * @param start the offset into this at which to start matching 1153 * @return The offset into this of the start of <TT>text</TT>, 1154 * or -1 if not found. 1155 * @stable ICU 2.0 1156 */ 1157 inline int32_t lastIndexOf(const UChar *srcChars, 1158 int32_t srcLength, 1159 int32_t start) const; 1160 1161 /** 1162 * Locate in this the last occurrence in the range 1163 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1164 * in <TT>srcChars</TT>, using bitwise comparison. 1165 * @param srcChars The text to search for. 1166 * @param srcLength the number of characters in <TT>srcChars</TT> 1167 * @param start The offset at which searching will start. 1168 * @param length The number of characters to search 1169 * @return The offset into this of the start of <TT>srcChars</TT>, 1170 * or -1 if not found. 1171 * @stable ICU 2.0 1172 */ 1173 inline int32_t lastIndexOf(const UChar *srcChars, 1174 int32_t srcLength, 1175 int32_t start, 1176 int32_t length) const; 1177 1178 /** 1179 * Locate in this the last occurrence in the range 1180 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1181 * in <TT>srcChars</TT> in the range 1182 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1183 * using bitwise comparison. 1184 * @param srcChars The text to search for. 1185 * @param srcStart the offset into <TT>srcChars</TT> at which 1186 * to start matching 1187 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1188 * @param start the offset into this at which to start matching 1189 * @param length the number of characters in this to search 1190 * @return The offset into this of the start of <TT>text</TT>, 1191 * or -1 if not found. 1192 * @stable ICU 2.0 1193 */ 1194 int32_t lastIndexOf(const UChar *srcChars, 1195 int32_t srcStart, 1196 int32_t srcLength, 1197 int32_t start, 1198 int32_t length) const; 1199 1200 /** 1201 * Locate in this the last occurrence of the BMP code point <code>c</code>, 1202 * using bitwise comparison. 1203 * @param c The code unit to search for. 1204 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1205 * @stable ICU 2.0 1206 */ 1207 inline int32_t lastIndexOf(UChar c) const; 1208 1209 /** 1210 * Locate in this the last occurrence of the code point <TT>c</TT>, 1211 * using bitwise comparison. 1212 * 1213 * @param c The code point to search for. 1214 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1215 * @stable ICU 2.0 1216 */ 1217 inline int32_t lastIndexOf(UChar32 c) const; 1218 1219 /** 1220 * Locate in this the last occurrence of the BMP code point <code>c</code> 1221 * starting at offset <TT>start</TT>, using bitwise comparison. 1222 * @param c The code unit to search for. 1223 * @param start The offset at which searching will start. 1224 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1225 * @stable ICU 2.0 1226 */ 1227 inline int32_t lastIndexOf(UChar c, 1228 int32_t start) const; 1229 1230 /** 1231 * Locate in this the last occurrence of the code point <TT>c</TT> 1232 * starting at offset <TT>start</TT>, using bitwise comparison. 1233 * 1234 * @param c The code point to search for. 1235 * @param start The offset at which searching will start. 1236 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1237 * @stable ICU 2.0 1238 */ 1239 inline int32_t lastIndexOf(UChar32 c, 1240 int32_t start) const; 1241 1242 /** 1243 * Locate in this the last occurrence of the BMP code point <code>c</code> 1244 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1245 * using bitwise comparison. 1246 * @param c The code unit to search for. 1247 * @param start the offset into this at which to start matching 1248 * @param length the number of characters in this to search 1249 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1250 * @stable ICU 2.0 1251 */ 1252 inline int32_t lastIndexOf(UChar c, 1253 int32_t start, 1254 int32_t length) const; 1255 1256 /** 1257 * Locate in this the last occurrence of the code point <TT>c</TT> 1258 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1259 * using bitwise comparison. 1260 * 1261 * @param c The code point to search for. 1262 * @param start the offset into this at which to start matching 1263 * @param length the number of characters in this to search 1264 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1265 * @stable ICU 2.0 1266 */ 1267 inline int32_t lastIndexOf(UChar32 c, 1268 int32_t start, 1269 int32_t length) const; 1270 1271 1272 /* Character access */ 1273 1274 /** 1275 * Return the code unit at offset <tt>offset</tt>. 1276 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1277 * @param offset a valid offset into the text 1278 * @return the code unit at offset <tt>offset</tt> 1279 * or 0xffff if the offset is not valid for this string 1280 * @stable ICU 2.0 1281 */ 1282 inline UChar charAt(int32_t offset) const; 1283 1284 /** 1285 * Return the code unit at offset <tt>offset</tt>. 1286 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1287 * @param offset a valid offset into the text 1288 * @return the code unit at offset <tt>offset</tt> 1289 * @stable ICU 2.0 1290 */ 1291 inline UChar operator[] (int32_t offset) const; 1292 1293 /** 1294 * Return the code point that contains the code unit 1295 * at offset <tt>offset</tt>. 1296 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1297 * @param offset a valid offset into the text 1298 * that indicates the text offset of any of the code units 1299 * that will be assembled into a code point (21-bit value) and returned 1300 * @return the code point of text at <tt>offset</tt> 1301 * or 0xffff if the offset is not valid for this string 1302 * @stable ICU 2.0 1303 */ 1304 UChar32 char32At(int32_t offset) const; 1305 1306 /** 1307 * Adjust a random-access offset so that 1308 * it points to the beginning of a Unicode character. 1309 * The offset that is passed in points to 1310 * any code unit of a code point, 1311 * while the returned offset will point to the first code unit 1312 * of the same code point. 1313 * In UTF-16, if the input offset points to a second surrogate 1314 * of a surrogate pair, then the returned offset will point 1315 * to the first surrogate. 1316 * @param offset a valid offset into one code point of the text 1317 * @return offset of the first code unit of the same code point 1318 * @see U16_SET_CP_START 1319 * @stable ICU 2.0 1320 */ 1321 int32_t getChar32Start(int32_t offset) const; 1322 1323 /** 1324 * Adjust a random-access offset so that 1325 * it points behind a Unicode character. 1326 * The offset that is passed in points behind 1327 * any code unit of a code point, 1328 * while the returned offset will point behind the last code unit 1329 * of the same code point. 1330 * In UTF-16, if the input offset points behind the first surrogate 1331 * (i.e., to the second surrogate) 1332 * of a surrogate pair, then the returned offset will point 1333 * behind the second surrogate (i.e., to the first surrogate). 1334 * @param offset a valid offset after any code unit of a code point of the text 1335 * @return offset of the first code unit after the same code point 1336 * @see U16_SET_CP_LIMIT 1337 * @stable ICU 2.0 1338 */ 1339 int32_t getChar32Limit(int32_t offset) const; 1340 1341 /** 1342 * Move the code unit index along the string by delta code points. 1343 * Interpret the input index as a code unit-based offset into the string, 1344 * move the index forward or backward by delta code points, and 1345 * return the resulting index. 1346 * The input index should point to the first code unit of a code point, 1347 * if there is more than one. 1348 * 1349 * Both input and output indexes are code unit-based as for all 1350 * string indexes/offsets in ICU (and other libraries, like MBCS char*). 1351 * If delta<0 then the index is moved backward (toward the start of the string). 1352 * If delta>0 then the index is moved forward (toward the end of the string). 1353 * 1354 * This behaves like CharacterIterator::move32(delta, kCurrent). 1355 * 1356 * Behavior for out-of-bounds indexes: 1357 * <code>moveIndex32</code> pins the input index to 0..length(), i.e., 1358 * if the input index<0 then it is pinned to 0; 1359 * if it is index>length() then it is pinned to length(). 1360 * Afterwards, the index is moved by <code>delta</code> code points 1361 * forward or backward, 1362 * but no further backward than to 0 and no further forward than to length(). 1363 * The resulting index return value will be in between 0 and length(), inclusively. 1364 * 1365 * Examples: 1366 * <pre> 1367 * // s has code points 'a' U+10000 'b' U+10ffff U+2029 1368 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); 1369 * 1370 * // initial index: position of U+10000 1371 * int32_t index=1; 1372 * 1373 * // the following examples will all result in index==4, position of U+10ffff 1374 * 1375 * // skip 2 code points from some position in the string 1376 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' 1377 * 1378 * // go to the 3rd code point from the start of s (0-based) 1379 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' 1380 * 1381 * // go to the next-to-last code point of s 1382 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff 1383 * </pre> 1384 * 1385 * @param index input code unit index 1386 * @param delta (signed) code point count to move the index forward or backward 1387 * in the string 1388 * @return the resulting code unit index 1389 * @stable ICU 2.0 1390 */ 1391 int32_t moveIndex32(int32_t index, int32_t delta) const; 1392 1393 /* Substring extraction */ 1394 1395 /** 1396 * Copy the characters in the range 1397 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, 1398 * beginning at <tt>dstStart</tt>. 1399 * If the string aliases to <code>dst</code> itself as an external buffer, 1400 * then extract() will not copy the contents. 1401 * 1402 * @param start offset of first character which will be copied into the array 1403 * @param length the number of characters to extract 1404 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1405 * must be at least (<tt>dstStart + length</tt>). 1406 * @param dstStart the offset in <TT>dst</TT> where the first character 1407 * will be extracted 1408 * @stable ICU 2.0 1409 */ 1410 inline void extract(int32_t start, 1411 int32_t length, 1412 UChar *dst, 1413 int32_t dstStart = 0) const; 1414 1415 /** 1416 * Copy the contents of the string into dest. 1417 * This is a convenience function that 1418 * checks if there is enough space in dest, 1419 * extracts the entire string if possible, 1420 * and NUL-terminates dest if possible. 1421 * 1422 * If the string fits into dest but cannot be NUL-terminated 1423 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. 1424 * If the string itself does not fit into dest 1425 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. 1426 * 1427 * If the string aliases to <code>dest</code> itself as an external buffer, 1428 * then extract() will not copy the contents. 1429 * 1430 * @param dest Destination string buffer. 1431 * @param destCapacity Number of UChars available at dest. 1432 * @param errorCode ICU error code. 1433 * @return length() 1434 * @stable ICU 2.0 1435 */ 1436 int32_t 1437 extract(UChar *dest, int32_t destCapacity, 1438 UErrorCode &errorCode) const; 1439 1440 /** 1441 * Copy the characters in the range 1442 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString 1443 * <tt>target</tt>. 1444 * @param start offset of first character which will be copied 1445 * @param length the number of characters to extract 1446 * @param target UnicodeString into which to copy characters. 1447 * @return A reference to <TT>target</TT> 1448 * @stable ICU 2.0 1449 */ 1450 inline void extract(int32_t start, 1451 int32_t length, 1452 UnicodeString& target) const; 1453 1454 /** 1455 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1456 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. 1457 * @param start offset of first character which will be copied into the array 1458 * @param limit offset immediately following the last character to be copied 1459 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1460 * must be at least (<tt>dstStart + (limit - start)</tt>). 1461 * @param dstStart the offset in <TT>dst</TT> where the first character 1462 * will be extracted 1463 * @stable ICU 2.0 1464 */ 1465 inline void extractBetween(int32_t start, 1466 int32_t limit, 1467 UChar *dst, 1468 int32_t dstStart = 0) const; 1469 1470 /** 1471 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1472 * into the UnicodeString <tt>target</tt>. Replaceable API. 1473 * @param start offset of first character which will be copied 1474 * @param limit offset immediately following the last character to be copied 1475 * @param target UnicodeString into which to copy characters. 1476 * @return A reference to <TT>target</TT> 1477 * @stable ICU 2.0 1478 */ 1479 virtual void extractBetween(int32_t start, 1480 int32_t limit, 1481 UnicodeString& target) const; 1482 1483 /** 1484 * Copy the characters in the range 1485 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters. 1486 * All characters must be invariant (see utypes.h). 1487 * Use US_INV as the last, signature-distinguishing parameter. 1488 * 1489 * This function does not write any more than <code>targetLength</code> 1490 * characters but returns the length of the entire output string 1491 * so that one can allocate a larger buffer and call the function again 1492 * if necessary. 1493 * The output string is NUL-terminated if possible. 1494 * 1495 * @param start offset of first character which will be copied 1496 * @param startLength the number of characters to extract 1497 * @param target the target buffer for extraction, can be NULL 1498 * if targetLength is 0 1499 * @param targetCapacity the length of the target buffer 1500 * @param inv Signature-distinguishing paramater, use US_INV. 1501 * @return the output string length, not including the terminating NUL 1502 * @stable ICU 3.2 1503 */ 1504 int32_t extract(int32_t start, 1505 int32_t startLength, 1506 char *target, 1507 int32_t targetCapacity, 1508 enum EInvariant inv) const; 1509 1510#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 1511 1512 /** 1513 * Copy the characters in the range 1514 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1515 * in the platform's default codepage. 1516 * This function does not write any more than <code>targetLength</code> 1517 * characters but returns the length of the entire output string 1518 * so that one can allocate a larger buffer and call the function again 1519 * if necessary. 1520 * The output string is NUL-terminated if possible. 1521 * 1522 * @param start offset of first character which will be copied 1523 * @param startLength the number of characters to extract 1524 * @param target the target buffer for extraction 1525 * @param targetLength the length of the target buffer 1526 * If <TT>target</TT> is NULL, then the number of bytes required for 1527 * <TT>target</TT> is returned. 1528 * @return the output string length, not including the terminating NUL 1529 * @stable ICU 2.0 1530 */ 1531 int32_t extract(int32_t start, 1532 int32_t startLength, 1533 char *target, 1534 uint32_t targetLength) const; 1535 1536#endif 1537 1538#if !UCONFIG_NO_CONVERSION 1539 1540 /** 1541 * Copy the characters in the range 1542 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1543 * in a specified codepage. 1544 * The output string is NUL-terminated. 1545 * 1546 * Recommendation: For invariant-character strings use 1547 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1548 * because it avoids object code dependencies of UnicodeString on 1549 * the conversion code. 1550 * 1551 * @param start offset of first character which will be copied 1552 * @param startLength the number of characters to extract 1553 * @param target the target buffer for extraction 1554 * @param codepage the desired codepage for the characters. 0 has 1555 * the special meaning of the default codepage 1556 * If <code>codepage</code> is an empty string (<code>""</code>), 1557 * then a simple conversion is performed on the codepage-invariant 1558 * subset ("invariant characters") of the platform encoding. See utypes.h. 1559 * If <TT>target</TT> is NULL, then the number of bytes required for 1560 * <TT>target</TT> is returned. It is assumed that the target is big enough 1561 * to fit all of the characters. 1562 * @return the output string length, not including the terminating NUL 1563 * @stable ICU 2.0 1564 */ 1565 inline int32_t extract(int32_t start, 1566 int32_t startLength, 1567 char *target, 1568 const char *codepage = 0) const; 1569 1570 /** 1571 * Copy the characters in the range 1572 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1573 * in a specified codepage. 1574 * This function does not write any more than <code>targetLength</code> 1575 * characters but returns the length of the entire output string 1576 * so that one can allocate a larger buffer and call the function again 1577 * if necessary. 1578 * The output string is NUL-terminated if possible. 1579 * 1580 * Recommendation: For invariant-character strings use 1581 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1582 * because it avoids object code dependencies of UnicodeString on 1583 * the conversion code. 1584 * 1585 * @param start offset of first character which will be copied 1586 * @param startLength the number of characters to extract 1587 * @param target the target buffer for extraction 1588 * @param targetLength the length of the target buffer 1589 * @param codepage the desired codepage for the characters. 0 has 1590 * the special meaning of the default codepage 1591 * If <code>codepage</code> is an empty string (<code>""</code>), 1592 * then a simple conversion is performed on the codepage-invariant 1593 * subset ("invariant characters") of the platform encoding. See utypes.h. 1594 * If <TT>target</TT> is NULL, then the number of bytes required for 1595 * <TT>target</TT> is returned. 1596 * @return the output string length, not including the terminating NUL 1597 * @stable ICU 2.0 1598 */ 1599 int32_t extract(int32_t start, 1600 int32_t startLength, 1601 char *target, 1602 uint32_t targetLength, 1603 const char *codepage) const; 1604 1605 /** 1606 * Convert the UnicodeString into a codepage string using an existing UConverter. 1607 * The output string is NUL-terminated if possible. 1608 * 1609 * This function avoids the overhead of opening and closing a converter if 1610 * multiple strings are extracted. 1611 * 1612 * @param dest destination string buffer, can be NULL if destCapacity==0 1613 * @param destCapacity the number of chars available at dest 1614 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), 1615 * or NULL for the default converter 1616 * @param errorCode normal ICU error code 1617 * @return the length of the output string, not counting the terminating NUL; 1618 * if the length is greater than destCapacity, then the string will not fit 1619 * and a buffer of the indicated length would need to be passed in 1620 * @stable ICU 2.0 1621 */ 1622 int32_t extract(char *dest, int32_t destCapacity, 1623 UConverter *cnv, 1624 UErrorCode &errorCode) const; 1625 1626#endif 1627 1628 /** 1629 * Create a temporary substring for the specified range. 1630 * Unlike the substring constructor and setTo() functions, 1631 * the object returned here will be a read-only alias (using getBuffer()) 1632 * rather than copying the text. 1633 * As a result, this substring operation is much faster but requires 1634 * that the original string not be modified or deleted during the lifetime 1635 * of the returned substring object. 1636 * @param start offset of the first character visible in the substring 1637 * @param length length of the substring 1638 * @return a read-only alias UnicodeString object for the substring 1639 * @stable ICU 4.4 1640 */ 1641 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 1642 1643 /** 1644 * Create a temporary substring for the specified range. 1645 * Same as tempSubString(start, length) except that the substring range 1646 * is specified as a (start, limit) pair (with an exclusive limit index) 1647 * rather than a (start, length) pair. 1648 * @param start offset of the first character visible in the substring 1649 * @param limit offset immediately following the last character visible in the substring 1650 * @return a read-only alias UnicodeString object for the substring 1651 * @stable ICU 4.4 1652 */ 1653 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 1654 1655 /** 1656 * Convert the UnicodeString to UTF-8 and write the result 1657 * to a ByteSink. This is called by toUTF8String(). 1658 * Unpaired surrogates are replaced with U+FFFD. 1659 * Calls u_strToUTF8WithSub(). 1660 * 1661 * @param sink A ByteSink to which the UTF-8 version of the string is written. 1662 * sink.Flush() is called at the end. 1663 * @stable ICU 4.2 1664 * @see toUTF8String 1665 */ 1666 void toUTF8(ByteSink &sink) const; 1667 1668#if U_HAVE_STD_STRING 1669 1670 /** 1671 * Convert the UnicodeString to UTF-8 and append the result 1672 * to a standard string. 1673 * Unpaired surrogates are replaced with U+FFFD. 1674 * Calls toUTF8(). 1675 * 1676 * @param result A standard string (or a compatible object) 1677 * to which the UTF-8 version of the string is appended. 1678 * @return The string object. 1679 * @stable ICU 4.2 1680 * @see toUTF8 1681 */ 1682 template<typename StringClass> 1683 StringClass &toUTF8String(StringClass &result) const { 1684 StringByteSink<StringClass> sbs(&result); 1685 toUTF8(sbs); 1686 return result; 1687 } 1688 1689#endif 1690 1691 /** 1692 * Convert the UnicodeString to UTF-32. 1693 * Unpaired surrogates are replaced with U+FFFD. 1694 * Calls u_strToUTF32WithSub(). 1695 * 1696 * @param utf32 destination string buffer, can be NULL if capacity==0 1697 * @param capacity the number of UChar32s available at utf32 1698 * @param errorCode Standard ICU error code. Its input value must 1699 * pass the U_SUCCESS() test, or else the function returns 1700 * immediately. Check for U_FAILURE() on output or use with 1701 * function chaining. (See User Guide for details.) 1702 * @return The length of the UTF-32 string. 1703 * @see fromUTF32 1704 * @stable ICU 4.2 1705 */ 1706 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 1707 1708 /* Length operations */ 1709 1710 /** 1711 * Return the length of the UnicodeString object. 1712 * The length is the number of UChar code units are in the UnicodeString. 1713 * If you want the number of code points, please use countChar32(). 1714 * @return the length of the UnicodeString object 1715 * @see countChar32 1716 * @stable ICU 2.0 1717 */ 1718 inline int32_t length(void) const; 1719 1720 /** 1721 * Count Unicode code points in the length UChar code units of the string. 1722 * A code point may occupy either one or two UChar code units. 1723 * Counting code points involves reading all code units. 1724 * 1725 * This functions is basically the inverse of moveIndex32(). 1726 * 1727 * @param start the index of the first code unit to check 1728 * @param length the number of UChar code units to check 1729 * @return the number of code points in the specified code units 1730 * @see length 1731 * @stable ICU 2.0 1732 */ 1733 int32_t 1734 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 1735 1736 /** 1737 * Check if the length UChar code units of the string 1738 * contain more Unicode code points than a certain number. 1739 * This is more efficient than counting all code points in this part of the string 1740 * and comparing that number with a threshold. 1741 * This function may not need to scan the string at all if the length 1742 * falls within a certain range, and 1743 * never needs to count more than 'number+1' code points. 1744 * Logically equivalent to (countChar32(start, length)>number). 1745 * A Unicode code point may occupy either one or two UChar code units. 1746 * 1747 * @param start the index of the first code unit to check (0 for the entire string) 1748 * @param length the number of UChar code units to check 1749 * (use INT32_MAX for the entire string; remember that start/length 1750 * values are pinned) 1751 * @param number The number of code points in the (sub)string is compared against 1752 * the 'number' parameter. 1753 * @return Boolean value for whether the string contains more Unicode code points 1754 * than 'number'. Same as (u_countChar32(s, length)>number). 1755 * @see countChar32 1756 * @see u_strHasMoreChar32Than 1757 * @stable ICU 2.4 1758 */ 1759 UBool 1760 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 1761 1762 /** 1763 * Determine if this string is empty. 1764 * @return TRUE if this string contains 0 characters, FALSE otherwise. 1765 * @stable ICU 2.0 1766 */ 1767 inline UBool isEmpty(void) const; 1768 1769 /** 1770 * Return the capacity of the internal buffer of the UnicodeString object. 1771 * This is useful together with the getBuffer functions. 1772 * See there for details. 1773 * 1774 * @return the number of UChars available in the internal buffer 1775 * @see getBuffer 1776 * @stable ICU 2.0 1777 */ 1778 inline int32_t getCapacity(void) const; 1779 1780 /* Other operations */ 1781 1782 /** 1783 * Generate a hash code for this object. 1784 * @return The hash code of this UnicodeString. 1785 * @stable ICU 2.0 1786 */ 1787 inline int32_t hashCode(void) const; 1788 1789 /** 1790 * Determine if this object contains a valid string. 1791 * A bogus string has no value. It is different from an empty string, 1792 * although in both cases isEmpty() returns TRUE and length() returns 0. 1793 * setToBogus() and isBogus() can be used to indicate that no string value is available. 1794 * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and 1795 * length() returns 0. 1796 * 1797 * @return TRUE if the string is valid, FALSE otherwise 1798 * @see setToBogus() 1799 * @stable ICU 2.0 1800 */ 1801 inline UBool isBogus(void) const; 1802 1803 1804 //======================================== 1805 // Write operations 1806 //======================================== 1807 1808 /* Assignment operations */ 1809 1810 /** 1811 * Assignment operator. Replace the characters in this UnicodeString 1812 * with the characters from <TT>srcText</TT>. 1813 * @param srcText The text containing the characters to replace 1814 * @return a reference to this 1815 * @stable ICU 2.0 1816 */ 1817 UnicodeString &operator=(const UnicodeString &srcText); 1818 1819 /** 1820 * Almost the same as the assignment operator. 1821 * Replace the characters in this UnicodeString 1822 * with the characters from <code>srcText</code>. 1823 * 1824 * This function works the same as the assignment operator 1825 * for all strings except for ones that are readonly aliases. 1826 * 1827 * Starting with ICU 2.4, the assignment operator and the copy constructor 1828 * allocate a new buffer and copy the buffer contents even for readonly aliases. 1829 * This function implements the old, more efficient but less safe behavior 1830 * of making this string also a readonly alias to the same buffer. 1831 * 1832 * The fastCopyFrom function must be used only if it is known that the lifetime of 1833 * this UnicodeString does not exceed the lifetime of the aliased buffer 1834 * including its contents, for example for strings from resource bundles 1835 * or aliases to string constants. 1836 * 1837 * @param src The text containing the characters to replace. 1838 * @return a reference to this 1839 * @stable ICU 2.4 1840 */ 1841 UnicodeString &fastCopyFrom(const UnicodeString &src); 1842 1843 /** 1844 * Assignment operator. Replace the characters in this UnicodeString 1845 * with the code unit <TT>ch</TT>. 1846 * @param ch the code unit to replace 1847 * @return a reference to this 1848 * @stable ICU 2.0 1849 */ 1850 inline UnicodeString& operator= (UChar ch); 1851 1852 /** 1853 * Assignment operator. Replace the characters in this UnicodeString 1854 * with the code point <TT>ch</TT>. 1855 * @param ch the code point to replace 1856 * @return a reference to this 1857 * @stable ICU 2.0 1858 */ 1859 inline UnicodeString& operator= (UChar32 ch); 1860 1861 /** 1862 * Set the text in the UnicodeString object to the characters 1863 * in <TT>srcText</TT> in the range 1864 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>). 1865 * <TT>srcText</TT> is not modified. 1866 * @param srcText the source for the new characters 1867 * @param srcStart the offset into <TT>srcText</TT> where new characters 1868 * will be obtained 1869 * @return a reference to this 1870 * @stable ICU 2.2 1871 */ 1872 inline UnicodeString& setTo(const UnicodeString& srcText, 1873 int32_t srcStart); 1874 1875 /** 1876 * Set the text in the UnicodeString object to the characters 1877 * in <TT>srcText</TT> in the range 1878 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1879 * <TT>srcText</TT> is not modified. 1880 * @param srcText the source for the new characters 1881 * @param srcStart the offset into <TT>srcText</TT> where new characters 1882 * will be obtained 1883 * @param srcLength the number of characters in <TT>srcText</TT> in the 1884 * replace string. 1885 * @return a reference to this 1886 * @stable ICU 2.0 1887 */ 1888 inline UnicodeString& setTo(const UnicodeString& srcText, 1889 int32_t srcStart, 1890 int32_t srcLength); 1891 1892 /** 1893 * Set the text in the UnicodeString object to the characters in 1894 * <TT>srcText</TT>. 1895 * <TT>srcText</TT> is not modified. 1896 * @param srcText the source for the new characters 1897 * @return a reference to this 1898 * @stable ICU 2.0 1899 */ 1900 inline UnicodeString& setTo(const UnicodeString& srcText); 1901 1902 /** 1903 * Set the characters in the UnicodeString object to the characters 1904 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 1905 * @param srcChars the source for the new characters 1906 * @param srcLength the number of Unicode characters in srcChars. 1907 * @return a reference to this 1908 * @stable ICU 2.0 1909 */ 1910 inline UnicodeString& setTo(const UChar *srcChars, 1911 int32_t srcLength); 1912 1913 /** 1914 * Set the characters in the UnicodeString object to the code unit 1915 * <TT>srcChar</TT>. 1916 * @param srcChar the code unit which becomes the UnicodeString's character 1917 * content 1918 * @return a reference to this 1919 * @stable ICU 2.0 1920 */ 1921 UnicodeString& setTo(UChar srcChar); 1922 1923 /** 1924 * Set the characters in the UnicodeString object to the code point 1925 * <TT>srcChar</TT>. 1926 * @param srcChar the code point which becomes the UnicodeString's character 1927 * content 1928 * @return a reference to this 1929 * @stable ICU 2.0 1930 */ 1931 UnicodeString& setTo(UChar32 srcChar); 1932 1933 /** 1934 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. 1935 * The text will be used for the UnicodeString object, but 1936 * it will not be released when the UnicodeString is destroyed. 1937 * This has copy-on-write semantics: 1938 * When the string is modified, then the buffer is first copied into 1939 * newly allocated memory. 1940 * The aliased buffer is never modified. 1941 * 1942 * In an assignment to another UnicodeString, when using the copy constructor 1943 * or the assignment operator, the text will be copied. 1944 * When using fastCopyFrom(), the text will be aliased again, 1945 * so that both strings then alias the same readonly-text. 1946 * 1947 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 1948 * This must be true if <code>textLength==-1</code>. 1949 * @param text The characters to alias for the UnicodeString. 1950 * @param textLength The number of Unicode characters in <code>text</code> to alias. 1951 * If -1, then this constructor will determine the length 1952 * by calling <code>u_strlen()</code>. 1953 * @return a reference to this 1954 * @stable ICU 2.0 1955 */ 1956 UnicodeString &setTo(UBool isTerminated, 1957 const UChar *text, 1958 int32_t textLength); 1959 1960 /** 1961 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. 1962 * The text will be used for the UnicodeString object, but 1963 * it will not be released when the UnicodeString is destroyed. 1964 * This has write-through semantics: 1965 * For as long as the capacity of the buffer is sufficient, write operations 1966 * will directly affect the buffer. When more capacity is necessary, then 1967 * a new buffer will be allocated and the contents copied as with regularly 1968 * constructed strings. 1969 * In an assignment to another UnicodeString, the buffer will be copied. 1970 * The extract(UChar *dst) function detects whether the dst pointer is the same 1971 * as the string buffer itself and will in this case not copy the contents. 1972 * 1973 * @param buffer The characters to alias for the UnicodeString. 1974 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 1975 * @param buffCapacity The size of <code>buffer</code> in UChars. 1976 * @return a reference to this 1977 * @stable ICU 2.0 1978 */ 1979 UnicodeString &setTo(UChar *buffer, 1980 int32_t buffLength, 1981 int32_t buffCapacity); 1982 1983 /** 1984 * Make this UnicodeString object invalid. 1985 * The string will test TRUE with isBogus(). 1986 * 1987 * A bogus string has no value. It is different from an empty string. 1988 * It can be used to indicate that no string value is available. 1989 * getBuffer() and getTerminatedBuffer() return NULL, and 1990 * length() returns 0. 1991 * 1992 * This utility function is used throughout the UnicodeString 1993 * implementation to indicate that a UnicodeString operation failed, 1994 * and may be used in other functions, 1995 * especially but not exclusively when such functions do not 1996 * take a UErrorCode for simplicity. 1997 * 1998 * The following methods, and no others, will clear a string object's bogus flag: 1999 * - remove() 2000 * - remove(0, INT32_MAX) 2001 * - truncate(0) 2002 * - operator=() (assignment operator) 2003 * - setTo(...) 2004 * 2005 * The simplest ways to turn a bogus string into an empty one 2006 * is to use the remove() function. 2007 * Examples for other functions that are equivalent to "set to empty string": 2008 * \code 2009 * if(s.isBogus()) { 2010 * s.remove(); // set to an empty string (remove all), or 2011 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or 2012 * s.truncate(0); // set to an empty string (complete truncation), or 2013 * s=UnicodeString(); // assign an empty string, or 2014 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or 2015 * static const UChar nul=0; 2016 * s.setTo(&nul, 0); // set to an empty C Unicode string 2017 * } 2018 * \endcode 2019 * 2020 * @see isBogus() 2021 * @stable ICU 2.0 2022 */ 2023 void setToBogus(); 2024 2025 /** 2026 * Set the character at the specified offset to the specified character. 2027 * @param offset A valid offset into the text of the character to set 2028 * @param ch The new character 2029 * @return A reference to this 2030 * @stable ICU 2.0 2031 */ 2032 UnicodeString& setCharAt(int32_t offset, 2033 UChar ch); 2034 2035 2036 /* Append operations */ 2037 2038 /** 2039 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString 2040 * object. 2041 * @param ch the code unit to be appended 2042 * @return a reference to this 2043 * @stable ICU 2.0 2044 */ 2045 inline UnicodeString& operator+= (UChar ch); 2046 2047 /** 2048 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString 2049 * object. 2050 * @param ch the code point to be appended 2051 * @return a reference to this 2052 * @stable ICU 2.0 2053 */ 2054 inline UnicodeString& operator+= (UChar32 ch); 2055 2056 /** 2057 * Append operator. Append the characters in <TT>srcText</TT> to the 2058 * UnicodeString object. <TT>srcText</TT> is not modified. 2059 * @param srcText the source for the new characters 2060 * @return a reference to this 2061 * @stable ICU 2.0 2062 */ 2063 inline UnicodeString& operator+= (const UnicodeString& srcText); 2064 2065 /** 2066 * Append the characters 2067 * in <TT>srcText</TT> in the range 2068 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the 2069 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> 2070 * is not modified. 2071 * @param srcText the source for the new characters 2072 * @param srcStart the offset into <TT>srcText</TT> where new characters 2073 * will be obtained 2074 * @param srcLength the number of characters in <TT>srcText</TT> in 2075 * the append string 2076 * @return a reference to this 2077 * @stable ICU 2.0 2078 */ 2079 inline UnicodeString& append(const UnicodeString& srcText, 2080 int32_t srcStart, 2081 int32_t srcLength); 2082 2083 /** 2084 * Append the characters in <TT>srcText</TT> to the UnicodeString object. 2085 * <TT>srcText</TT> is not modified. 2086 * @param srcText the source for the new characters 2087 * @return a reference to this 2088 * @stable ICU 2.0 2089 */ 2090 inline UnicodeString& append(const UnicodeString& srcText); 2091 2092 /** 2093 * Append the characters in <TT>srcChars</TT> in the range 2094 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString 2095 * object at offset 2096 * <TT>start</TT>. <TT>srcChars</TT> is not modified. 2097 * @param srcChars the source for the new characters 2098 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2099 * will be obtained 2100 * @param srcLength the number of characters in <TT>srcChars</TT> in 2101 * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated 2102 * @return a reference to this 2103 * @stable ICU 2.0 2104 */ 2105 inline UnicodeString& append(const UChar *srcChars, 2106 int32_t srcStart, 2107 int32_t srcLength); 2108 2109 /** 2110 * Append the characters in <TT>srcChars</TT> to the UnicodeString object 2111 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2112 * @param srcChars the source for the new characters 2113 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>; 2114 * can be -1 if <TT>srcChars</TT> is NUL-terminated 2115 * @return a reference to this 2116 * @stable ICU 2.0 2117 */ 2118 inline UnicodeString& append(const UChar *srcChars, 2119 int32_t srcLength); 2120 2121 /** 2122 * Append the code unit <TT>srcChar</TT> to the UnicodeString object. 2123 * @param srcChar the code unit to append 2124 * @return a reference to this 2125 * @stable ICU 2.0 2126 */ 2127 inline UnicodeString& append(UChar srcChar); 2128 2129 /** 2130 * Append the code point <TT>srcChar</TT> to the UnicodeString object. 2131 * @param srcChar the code point to append 2132 * @return a reference to this 2133 * @stable ICU 2.0 2134 */ 2135 UnicodeString& append(UChar32 srcChar); 2136 2137 2138 /* Insert operations */ 2139 2140 /** 2141 * Insert the characters in <TT>srcText</TT> in the range 2142 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2143 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2144 * @param start the offset where the insertion begins 2145 * @param srcText the source for the new characters 2146 * @param srcStart the offset into <TT>srcText</TT> where new characters 2147 * will be obtained 2148 * @param srcLength the number of characters in <TT>srcText</TT> in 2149 * the insert string 2150 * @return a reference to this 2151 * @stable ICU 2.0 2152 */ 2153 inline UnicodeString& insert(int32_t start, 2154 const UnicodeString& srcText, 2155 int32_t srcStart, 2156 int32_t srcLength); 2157 2158 /** 2159 * Insert the characters in <TT>srcText</TT> into the UnicodeString object 2160 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2161 * @param start the offset where the insertion begins 2162 * @param srcText the source for the new characters 2163 * @return a reference to this 2164 * @stable ICU 2.0 2165 */ 2166 inline UnicodeString& insert(int32_t start, 2167 const UnicodeString& srcText); 2168 2169 /** 2170 * Insert the characters in <TT>srcChars</TT> in the range 2171 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2172 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2173 * @param start the offset at which the insertion begins 2174 * @param srcChars the source for the new characters 2175 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2176 * will be obtained 2177 * @param srcLength the number of characters in <TT>srcChars</TT> 2178 * in the insert string 2179 * @return a reference to this 2180 * @stable ICU 2.0 2181 */ 2182 inline UnicodeString& insert(int32_t start, 2183 const UChar *srcChars, 2184 int32_t srcStart, 2185 int32_t srcLength); 2186 2187 /** 2188 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object 2189 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2190 * @param start the offset where the insertion begins 2191 * @param srcChars the source for the new characters 2192 * @param srcLength the number of Unicode characters in srcChars. 2193 * @return a reference to this 2194 * @stable ICU 2.0 2195 */ 2196 inline UnicodeString& insert(int32_t start, 2197 const UChar *srcChars, 2198 int32_t srcLength); 2199 2200 /** 2201 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at 2202 * offset <TT>start</TT>. 2203 * @param start the offset at which the insertion occurs 2204 * @param srcChar the code unit to insert 2205 * @return a reference to this 2206 * @stable ICU 2.0 2207 */ 2208 inline UnicodeString& insert(int32_t start, 2209 UChar srcChar); 2210 2211 /** 2212 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at 2213 * offset <TT>start</TT>. 2214 * @param start the offset at which the insertion occurs 2215 * @param srcChar the code point to insert 2216 * @return a reference to this 2217 * @stable ICU 2.0 2218 */ 2219 inline UnicodeString& insert(int32_t start, 2220 UChar32 srcChar); 2221 2222 2223 /* Replace operations */ 2224 2225 /** 2226 * Replace the characters in the range 2227 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2228 * <TT>srcText</TT> in the range 2229 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 2230 * <TT>srcText</TT> is not modified. 2231 * @param start the offset at which the replace operation begins 2232 * @param length the number of characters to replace. The character at 2233 * <TT>start + length</TT> is not modified. 2234 * @param srcText the source for the new characters 2235 * @param srcStart the offset into <TT>srcText</TT> where new characters 2236 * will be obtained 2237 * @param srcLength the number of characters in <TT>srcText</TT> in 2238 * the replace string 2239 * @return a reference to this 2240 * @stable ICU 2.0 2241 */ 2242 UnicodeString& replace(int32_t start, 2243 int32_t length, 2244 const UnicodeString& srcText, 2245 int32_t srcStart, 2246 int32_t srcLength); 2247 2248 /** 2249 * Replace the characters in the range 2250 * [<TT>start</TT>, <TT>start + length</TT>) 2251 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is 2252 * not modified. 2253 * @param start the offset at which the replace operation begins 2254 * @param length the number of characters to replace. The character at 2255 * <TT>start + length</TT> is not modified. 2256 * @param srcText the source for the new characters 2257 * @return a reference to this 2258 * @stable ICU 2.0 2259 */ 2260 UnicodeString& replace(int32_t start, 2261 int32_t length, 2262 const UnicodeString& srcText); 2263 2264 /** 2265 * Replace the characters in the range 2266 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2267 * <TT>srcChars</TT> in the range 2268 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> 2269 * is not modified. 2270 * @param start the offset at which the replace operation begins 2271 * @param length the number of characters to replace. The character at 2272 * <TT>start + length</TT> is not modified. 2273 * @param srcChars the source for the new characters 2274 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2275 * will be obtained 2276 * @param srcLength the number of characters in <TT>srcChars</TT> 2277 * in the replace string 2278 * @return a reference to this 2279 * @stable ICU 2.0 2280 */ 2281 UnicodeString& replace(int32_t start, 2282 int32_t length, 2283 const UChar *srcChars, 2284 int32_t srcStart, 2285 int32_t srcLength); 2286 2287 /** 2288 * Replace the characters in the range 2289 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2290 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 2291 * @param start the offset at which the replace operation begins 2292 * @param length number of characters to replace. The character at 2293 * <TT>start + length</TT> is not modified. 2294 * @param srcChars the source for the new characters 2295 * @param srcLength the number of Unicode characters in srcChars 2296 * @return a reference to this 2297 * @stable ICU 2.0 2298 */ 2299 inline UnicodeString& replace(int32_t start, 2300 int32_t length, 2301 const UChar *srcChars, 2302 int32_t srcLength); 2303 2304 /** 2305 * Replace the characters in the range 2306 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit 2307 * <TT>srcChar</TT>. 2308 * @param start the offset at which the replace operation begins 2309 * @param length the number of characters to replace. The character at 2310 * <TT>start + length</TT> is not modified. 2311 * @param srcChar the new code unit 2312 * @return a reference to this 2313 * @stable ICU 2.0 2314 */ 2315 inline UnicodeString& replace(int32_t start, 2316 int32_t length, 2317 UChar srcChar); 2318 2319 /** 2320 * Replace the characters in the range 2321 * [<TT>start</TT>, <TT>start + length</TT>) with the code point 2322 * <TT>srcChar</TT>. 2323 * @param start the offset at which the replace operation begins 2324 * @param length the number of characters to replace. The character at 2325 * <TT>start + length</TT> is not modified. 2326 * @param srcChar the new code point 2327 * @return a reference to this 2328 * @stable ICU 2.0 2329 */ 2330 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); 2331 2332 /** 2333 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2334 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified. 2335 * @param start the offset at which the replace operation begins 2336 * @param limit the offset immediately following the replace range 2337 * @param srcText the source for the new characters 2338 * @return a reference to this 2339 * @stable ICU 2.0 2340 */ 2341 inline UnicodeString& replaceBetween(int32_t start, 2342 int32_t limit, 2343 const UnicodeString& srcText); 2344 2345 /** 2346 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2347 * with the characters in <TT>srcText</TT> in the range 2348 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified. 2349 * @param start the offset at which the replace operation begins 2350 * @param limit the offset immediately following the replace range 2351 * @param srcText the source for the new characters 2352 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2353 * will be obtained 2354 * @param srcLimit the offset immediately following the range to copy 2355 * in <TT>srcText</TT> 2356 * @return a reference to this 2357 * @stable ICU 2.0 2358 */ 2359 inline UnicodeString& replaceBetween(int32_t start, 2360 int32_t limit, 2361 const UnicodeString& srcText, 2362 int32_t srcStart, 2363 int32_t srcLimit); 2364 2365 /** 2366 * Replace a substring of this object with the given text. 2367 * @param start the beginning index, inclusive; <code>0 <= start 2368 * <= limit</code>. 2369 * @param limit the ending index, exclusive; <code>start <= limit 2370 * <= length()</code>. 2371 * @param text the text to replace characters <code>start</code> 2372 * to <code>limit - 1</code> 2373 * @stable ICU 2.0 2374 */ 2375 virtual void handleReplaceBetween(int32_t start, 2376 int32_t limit, 2377 const UnicodeString& text); 2378 2379 /** 2380 * Replaceable API 2381 * @return TRUE if it has MetaData 2382 * @stable ICU 2.4 2383 */ 2384 virtual UBool hasMetaData() const; 2385 2386 /** 2387 * Copy a substring of this object, retaining attribute (out-of-band) 2388 * information. This method is used to duplicate or reorder substrings. 2389 * The destination index must not overlap the source range. 2390 * 2391 * @param start the beginning index, inclusive; <code>0 <= start <= 2392 * limit</code>. 2393 * @param limit the ending index, exclusive; <code>start <= limit <= 2394 * length()</code>. 2395 * @param dest the destination index. The characters from 2396 * <code>start..limit-1</code> will be copied to <code>dest</code>. 2397 * Implementations of this method may assume that <code>dest <= start || 2398 * dest >= limit</code>. 2399 * @stable ICU 2.0 2400 */ 2401 virtual void copy(int32_t start, int32_t limit, int32_t dest); 2402 2403 /* Search and replace operations */ 2404 2405 /** 2406 * Replace all occurrences of characters in oldText with the characters 2407 * in newText 2408 * @param oldText the text containing the search text 2409 * @param newText the text containing the replacement text 2410 * @return a reference to this 2411 * @stable ICU 2.0 2412 */ 2413 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 2414 const UnicodeString& newText); 2415 2416 /** 2417 * Replace all occurrences of characters in oldText with characters 2418 * in newText 2419 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2420 * @param start the start of the range in which replace will performed 2421 * @param length the length of the range in which replace will be performed 2422 * @param oldText the text containing the search text 2423 * @param newText the text containing the replacement text 2424 * @return a reference to this 2425 * @stable ICU 2.0 2426 */ 2427 inline UnicodeString& findAndReplace(int32_t start, 2428 int32_t length, 2429 const UnicodeString& oldText, 2430 const UnicodeString& newText); 2431 2432 /** 2433 * Replace all occurrences of characters in oldText in the range 2434 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters 2435 * in newText in the range 2436 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) 2437 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2438 * @param start the start of the range in which replace will performed 2439 * @param length the length of the range in which replace will be performed 2440 * @param oldText the text containing the search text 2441 * @param oldStart the start of the search range in <TT>oldText</TT> 2442 * @param oldLength the length of the search range in <TT>oldText</TT> 2443 * @param newText the text containing the replacement text 2444 * @param newStart the start of the replacement range in <TT>newText</TT> 2445 * @param newLength the length of the replacement range in <TT>newText</TT> 2446 * @return a reference to this 2447 * @stable ICU 2.0 2448 */ 2449 UnicodeString& findAndReplace(int32_t start, 2450 int32_t length, 2451 const UnicodeString& oldText, 2452 int32_t oldStart, 2453 int32_t oldLength, 2454 const UnicodeString& newText, 2455 int32_t newStart, 2456 int32_t newLength); 2457 2458 2459 /* Remove operations */ 2460 2461 /** 2462 * Remove all characters from the UnicodeString object. 2463 * @return a reference to this 2464 * @stable ICU 2.0 2465 */ 2466 inline UnicodeString& remove(void); 2467 2468 /** 2469 * Remove the characters in the range 2470 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object. 2471 * @param start the offset of the first character to remove 2472 * @param length the number of characters to remove 2473 * @return a reference to this 2474 * @stable ICU 2.0 2475 */ 2476 inline UnicodeString& remove(int32_t start, 2477 int32_t length = (int32_t)INT32_MAX); 2478 2479 /** 2480 * Remove the characters in the range 2481 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object. 2482 * @param start the offset of the first character to remove 2483 * @param limit the offset immediately following the range to remove 2484 * @return a reference to this 2485 * @stable ICU 2.0 2486 */ 2487 inline UnicodeString& removeBetween(int32_t start, 2488 int32_t limit = (int32_t)INT32_MAX); 2489 2490 /** 2491 * Retain only the characters in the range 2492 * [<code>start</code>, <code>limit</code>) from the UnicodeString object. 2493 * Removes characters before <code>start</code> and at and after <code>limit</code>. 2494 * @param start the offset of the first character to retain 2495 * @param limit the offset immediately following the range to retain 2496 * @return a reference to this 2497 * @stable ICU 4.4 2498 */ 2499 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 2500 2501 /* Length operations */ 2502 2503 /** 2504 * Pad the start of this UnicodeString with the character <TT>padChar</TT>. 2505 * If the length of this UnicodeString is less than targetLength, 2506 * length() - targetLength copies of padChar will be added to the 2507 * beginning of this UnicodeString. 2508 * @param targetLength the desired length of the string 2509 * @param padChar the character to use for padding. Defaults to 2510 * space (U+0020) 2511 * @return TRUE if the text was padded, FALSE otherwise. 2512 * @stable ICU 2.0 2513 */ 2514 UBool padLeading(int32_t targetLength, 2515 UChar padChar = 0x0020); 2516 2517 /** 2518 * Pad the end of this UnicodeString with the character <TT>padChar</TT>. 2519 * If the length of this UnicodeString is less than targetLength, 2520 * length() - targetLength copies of padChar will be added to the 2521 * end of this UnicodeString. 2522 * @param targetLength the desired length of the string 2523 * @param padChar the character to use for padding. Defaults to 2524 * space (U+0020) 2525 * @return TRUE if the text was padded, FALSE otherwise. 2526 * @stable ICU 2.0 2527 */ 2528 UBool padTrailing(int32_t targetLength, 2529 UChar padChar = 0x0020); 2530 2531 /** 2532 * Truncate this UnicodeString to the <TT>targetLength</TT>. 2533 * @param targetLength the desired length of this UnicodeString. 2534 * @return TRUE if the text was truncated, FALSE otherwise 2535 * @stable ICU 2.0 2536 */ 2537 inline UBool truncate(int32_t targetLength); 2538 2539 /** 2540 * Trims leading and trailing whitespace from this UnicodeString. 2541 * @return a reference to this 2542 * @stable ICU 2.0 2543 */ 2544 UnicodeString& trim(void); 2545 2546 2547 /* Miscellaneous operations */ 2548 2549 /** 2550 * Reverse this UnicodeString in place. 2551 * @return a reference to this 2552 * @stable ICU 2.0 2553 */ 2554 inline UnicodeString& reverse(void); 2555 2556 /** 2557 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in 2558 * this UnicodeString. 2559 * @param start the start of the range to reverse 2560 * @param length the number of characters to to reverse 2561 * @return a reference to this 2562 * @stable ICU 2.0 2563 */ 2564 inline UnicodeString& reverse(int32_t start, 2565 int32_t length); 2566 2567 /** 2568 * Convert the characters in this to UPPER CASE following the conventions of 2569 * the default locale. 2570 * @return A reference to this. 2571 * @stable ICU 2.0 2572 */ 2573 UnicodeString& toUpper(void); 2574 2575 /** 2576 * Convert the characters in this to UPPER CASE following the conventions of 2577 * a specific locale. 2578 * @param locale The locale containing the conventions to use. 2579 * @return A reference to this. 2580 * @stable ICU 2.0 2581 */ 2582 UnicodeString& toUpper(const Locale& locale); 2583 2584 /** 2585 * Convert the characters in this to lower case following the conventions of 2586 * the default locale. 2587 * @return A reference to this. 2588 * @stable ICU 2.0 2589 */ 2590 UnicodeString& toLower(void); 2591 2592 /** 2593 * Convert the characters in this to lower case following the conventions of 2594 * a specific locale. 2595 * @param locale The locale containing the conventions to use. 2596 * @return A reference to this. 2597 * @stable ICU 2.0 2598 */ 2599 UnicodeString& toLower(const Locale& locale); 2600 2601#if !UCONFIG_NO_BREAK_ITERATION 2602 2603 /** 2604 * Titlecase this string, convenience function using the default locale. 2605 * 2606 * Casing is locale-dependent and context-sensitive. 2607 * Titlecasing uses a break iterator to find the first characters of words 2608 * that are to be titlecased. It titlecases those characters and lowercases 2609 * all others. 2610 * 2611 * The titlecase break iterator can be provided to customize for arbitrary 2612 * styles, using rules and dictionaries beyond the standard iterators. 2613 * It may be more efficient to always provide an iterator to avoid 2614 * opening and closing one for each string. 2615 * The standard titlecase iterator for the root locale implements the 2616 * algorithm of Unicode TR 21. 2617 * 2618 * This function uses only the setText(), first() and next() methods of the 2619 * provided break iterator. 2620 * 2621 * @param titleIter A break iterator to find the first characters of words 2622 * that are to be titlecased. 2623 * If none is provided (0), then a standard titlecase 2624 * break iterator is opened. 2625 * Otherwise the provided iterator is set to the string's text. 2626 * @return A reference to this. 2627 * @stable ICU 2.1 2628 */ 2629 UnicodeString &toTitle(BreakIterator *titleIter); 2630 2631 /** 2632 * Titlecase this string. 2633 * 2634 * Casing is locale-dependent and context-sensitive. 2635 * Titlecasing uses a break iterator to find the first characters of words 2636 * that are to be titlecased. It titlecases those characters and lowercases 2637 * all others. 2638 * 2639 * The titlecase break iterator can be provided to customize for arbitrary 2640 * styles, using rules and dictionaries beyond the standard iterators. 2641 * It may be more efficient to always provide an iterator to avoid 2642 * opening and closing one for each string. 2643 * The standard titlecase iterator for the root locale implements the 2644 * algorithm of Unicode TR 21. 2645 * 2646 * This function uses only the setText(), first() and next() methods of the 2647 * provided break iterator. 2648 * 2649 * @param titleIter A break iterator to find the first characters of words 2650 * that are to be titlecased. 2651 * If none is provided (0), then a standard titlecase 2652 * break iterator is opened. 2653 * Otherwise the provided iterator is set to the string's text. 2654 * @param locale The locale to consider. 2655 * @return A reference to this. 2656 * @stable ICU 2.1 2657 */ 2658 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 2659 2660 /** 2661 * Titlecase this string, with options. 2662 * 2663 * Casing is locale-dependent and context-sensitive. 2664 * Titlecasing uses a break iterator to find the first characters of words 2665 * that are to be titlecased. It titlecases those characters and lowercases 2666 * all others. (This can be modified with options.) 2667 * 2668 * The titlecase break iterator can be provided to customize for arbitrary 2669 * styles, using rules and dictionaries beyond the standard iterators. 2670 * It may be more efficient to always provide an iterator to avoid 2671 * opening and closing one for each string. 2672 * The standard titlecase iterator for the root locale implements the 2673 * algorithm of Unicode TR 21. 2674 * 2675 * This function uses only the setText(), first() and next() methods of the 2676 * provided break iterator. 2677 * 2678 * @param titleIter A break iterator to find the first characters of words 2679 * that are to be titlecased. 2680 * If none is provided (0), then a standard titlecase 2681 * break iterator is opened. 2682 * Otherwise the provided iterator is set to the string's text. 2683 * @param locale The locale to consider. 2684 * @param options Options bit set, see ucasemap_open(). 2685 * @return A reference to this. 2686 * @see U_TITLECASE_NO_LOWERCASE 2687 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT 2688 * @see ucasemap_open 2689 * @stable ICU 3.8 2690 */ 2691 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 2692 2693#endif 2694 2695 /** 2696 * Case-folds the characters in this string. 2697 * 2698 * Case-folding is locale-independent and not context-sensitive, 2699 * but there is an option for whether to include or exclude mappings for dotted I 2700 * and dotless i that are marked with 'T' in CaseFolding.txt. 2701 * 2702 * The result may be longer or shorter than the original. 2703 * 2704 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I 2705 * @return A reference to this. 2706 * @stable ICU 2.0 2707 */ 2708 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 2709 2710 //======================================== 2711 // Access to the internal buffer 2712 //======================================== 2713 2714 /** 2715 * Get a read/write pointer to the internal buffer. 2716 * The buffer is guaranteed to be large enough for at least minCapacity UChars, 2717 * writable, and is still owned by the UnicodeString object. 2718 * Calls to getBuffer(minCapacity) must not be nested, and 2719 * must be matched with calls to releaseBuffer(newLength). 2720 * If the string buffer was read-only or shared, 2721 * then it will be reallocated and copied. 2722 * 2723 * An attempted nested call will return 0, and will not further modify the 2724 * state of the UnicodeString object. 2725 * It also returns 0 if the string is bogus. 2726 * 2727 * The actual capacity of the string buffer may be larger than minCapacity. 2728 * getCapacity() returns the actual capacity. 2729 * For many operations, the full capacity should be used to avoid reallocations. 2730 * 2731 * While the buffer is "open" between getBuffer(minCapacity) 2732 * and releaseBuffer(newLength), the following applies: 2733 * - The string length is set to 0. 2734 * - Any read API call on the UnicodeString object will behave like on a 0-length string. 2735 * - Any write API call on the UnicodeString object is disallowed and will have no effect. 2736 * - You can read from and write to the returned buffer. 2737 * - The previous string contents will still be in the buffer; 2738 * if you want to use it, then you need to call length() before getBuffer(minCapacity). 2739 * If the length() was greater than minCapacity, then any contents after minCapacity 2740 * may be lost. 2741 * The buffer contents is not NUL-terminated by getBuffer(). 2742 * If length()<getCapacity() then you can terminate it by writing a NUL 2743 * at index length(). 2744 * - You must call releaseBuffer(newLength) before and in order to 2745 * return to normal UnicodeString operation. 2746 * 2747 * @param minCapacity the minimum number of UChars that are to be available 2748 * in the buffer, starting at the returned pointer; 2749 * default to the current string capacity if minCapacity==-1 2750 * @return a writable pointer to the internal string buffer, 2751 * or 0 if an error occurs (nested calls, out of memory) 2752 * 2753 * @see releaseBuffer 2754 * @see getTerminatedBuffer() 2755 * @stable ICU 2.0 2756 */ 2757 UChar *getBuffer(int32_t minCapacity); 2758 2759 /** 2760 * Release a read/write buffer on a UnicodeString object with an 2761 * "open" getBuffer(minCapacity). 2762 * This function must be called in a matched pair with getBuffer(minCapacity). 2763 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". 2764 * 2765 * It will set the string length to newLength, at most to the current capacity. 2766 * If newLength==-1 then it will set the length according to the 2767 * first NUL in the buffer, or to the capacity if there is no NUL. 2768 * 2769 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. 2770 * 2771 * @param newLength the new length of the UnicodeString object; 2772 * defaults to the current capacity if newLength is greater than that; 2773 * if newLength==-1, it defaults to u_strlen(buffer) but not more than 2774 * the current capacity of the string 2775 * 2776 * @see getBuffer(int32_t minCapacity) 2777 * @stable ICU 2.0 2778 */ 2779 void releaseBuffer(int32_t newLength=-1); 2780 2781 /** 2782 * Get a read-only pointer to the internal buffer. 2783 * This can be called at any time on a valid UnicodeString. 2784 * 2785 * It returns 0 if the string is bogus, or 2786 * during an "open" getBuffer(minCapacity). 2787 * 2788 * It can be called as many times as desired. 2789 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2790 * at which time the pointer is semantically invalidated and must not be used any more. 2791 * 2792 * The capacity of the buffer can be determined with getCapacity(). 2793 * The part after length() may or may not be initialized and valid, 2794 * depending on the history of the UnicodeString object. 2795 * 2796 * The buffer contents is (probably) not NUL-terminated. 2797 * You can check if it is with 2798 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>. 2799 * (See getTerminatedBuffer().) 2800 * 2801 * The buffer may reside in read-only memory. Its contents must not 2802 * be modified. 2803 * 2804 * @return a read-only pointer to the internal string buffer, 2805 * or 0 if the string is empty or bogus 2806 * 2807 * @see getBuffer(int32_t minCapacity) 2808 * @see getTerminatedBuffer() 2809 * @stable ICU 2.0 2810 */ 2811 inline const UChar *getBuffer() const; 2812 2813 /** 2814 * Get a read-only pointer to the internal buffer, 2815 * making sure that it is NUL-terminated. 2816 * This can be called at any time on a valid UnicodeString. 2817 * 2818 * It returns 0 if the string is bogus, or 2819 * during an "open" getBuffer(minCapacity), or if the buffer cannot 2820 * be NUL-terminated (because memory allocation failed). 2821 * 2822 * It can be called as many times as desired. 2823 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2824 * at which time the pointer is semantically invalidated and must not be used any more. 2825 * 2826 * The capacity of the buffer can be determined with getCapacity(). 2827 * The part after length()+1 may or may not be initialized and valid, 2828 * depending on the history of the UnicodeString object. 2829 * 2830 * The buffer contents is guaranteed to be NUL-terminated. 2831 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL 2832 * is written. 2833 * For this reason, this function is not const, unlike getBuffer(). 2834 * Note that a UnicodeString may also contain NUL characters as part of its contents. 2835 * 2836 * The buffer may reside in read-only memory. Its contents must not 2837 * be modified. 2838 * 2839 * @return a read-only pointer to the internal string buffer, 2840 * or 0 if the string is empty or bogus 2841 * 2842 * @see getBuffer(int32_t minCapacity) 2843 * @see getBuffer() 2844 * @stable ICU 2.2 2845 */ 2846 inline const UChar *getTerminatedBuffer(); 2847 2848 //======================================== 2849 // Constructors 2850 //======================================== 2851 2852 /** Construct an empty UnicodeString. 2853 * @stable ICU 2.0 2854 */ 2855 UnicodeString(); 2856 2857 /** 2858 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars 2859 * @param capacity the number of UChars this UnicodeString should hold 2860 * before a resize is necessary; if count is greater than 0 and count 2861 * code points c take up more space than capacity, then capacity is adjusted 2862 * accordingly. 2863 * @param c is used to initially fill the string 2864 * @param count specifies how many code points c are to be written in the 2865 * string 2866 * @stable ICU 2.0 2867 */ 2868 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 2869 2870 /** 2871 * Single UChar (code unit) constructor. 2872 * 2873 * It is recommended to mark this constructor "explicit" by 2874 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2875 * on the compiler command line or similar. 2876 * @param ch the character to place in the UnicodeString 2877 * @stable ICU 2.0 2878 */ 2879 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); 2880 2881 /** 2882 * Single UChar32 (code point) constructor. 2883 * 2884 * It is recommended to mark this constructor "explicit" by 2885 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2886 * on the compiler command line or similar. 2887 * @param ch the character to place in the UnicodeString 2888 * @stable ICU 2.0 2889 */ 2890 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); 2891 2892 /** 2893 * UChar* constructor. 2894 * 2895 * It is recommended to mark this constructor "explicit" by 2896 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 2897 * on the compiler command line or similar. 2898 * @param text The characters to place in the UnicodeString. <TT>text</TT> 2899 * must be NULL (U+0000) terminated. 2900 * @stable ICU 2.0 2901 */ 2902 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); 2903 2904 /** 2905 * UChar* constructor. 2906 * @param text The characters to place in the UnicodeString. 2907 * @param textLength The number of Unicode characters in <TT>text</TT> 2908 * to copy. 2909 * @stable ICU 2.0 2910 */ 2911 UnicodeString(const UChar *text, 2912 int32_t textLength); 2913 2914 /** 2915 * Readonly-aliasing UChar* constructor. 2916 * The text will be used for the UnicodeString object, but 2917 * it will not be released when the UnicodeString is destroyed. 2918 * This has copy-on-write semantics: 2919 * When the string is modified, then the buffer is first copied into 2920 * newly allocated memory. 2921 * The aliased buffer is never modified. 2922 * 2923 * In an assignment to another UnicodeString, when using the copy constructor 2924 * or the assignment operator, the text will be copied. 2925 * When using fastCopyFrom(), the text will be aliased again, 2926 * so that both strings then alias the same readonly-text. 2927 * 2928 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 2929 * This must be true if <code>textLength==-1</code>. 2930 * @param text The characters to alias for the UnicodeString. 2931 * @param textLength The number of Unicode characters in <code>text</code> to alias. 2932 * If -1, then this constructor will determine the length 2933 * by calling <code>u_strlen()</code>. 2934 * @stable ICU 2.0 2935 */ 2936 UnicodeString(UBool isTerminated, 2937 const UChar *text, 2938 int32_t textLength); 2939 2940 /** 2941 * Writable-aliasing UChar* constructor. 2942 * The text will be used for the UnicodeString object, but 2943 * it will not be released when the UnicodeString is destroyed. 2944 * This has write-through semantics: 2945 * For as long as the capacity of the buffer is sufficient, write operations 2946 * will directly affect the buffer. When more capacity is necessary, then 2947 * a new buffer will be allocated and the contents copied as with regularly 2948 * constructed strings. 2949 * In an assignment to another UnicodeString, the buffer will be copied. 2950 * The extract(UChar *dst) function detects whether the dst pointer is the same 2951 * as the string buffer itself and will in this case not copy the contents. 2952 * 2953 * @param buffer The characters to alias for the UnicodeString. 2954 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 2955 * @param buffCapacity The size of <code>buffer</code> in UChars. 2956 * @stable ICU 2.0 2957 */ 2958 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); 2959 2960#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 2961 2962 /** 2963 * char* constructor. 2964 * Uses the default converter (and thus depends on the ICU conversion code) 2965 * unless U_CHARSET_IS_UTF8 is set to 1. 2966 * 2967 * For ASCII (really "invariant character") strings it is more efficient to use 2968 * the constructor that takes a US_INV (for its enum EInvariant). 2969 * For ASCII (invariant-character) string literals, see UNICODE_STRING and 2970 * UNICODE_STRING_SIMPLE. 2971 * 2972 * It is recommended to mark this constructor "explicit" by 2973 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 2974 * on the compiler command line or similar. 2975 * @param codepageData an array of bytes, null-terminated, 2976 * in the platform's default codepage. 2977 * @stable ICU 2.0 2978 * @see UNICODE_STRING 2979 * @see UNICODE_STRING_SIMPLE 2980 */ 2981 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); 2982 2983 /** 2984 * char* constructor. 2985 * Uses the default converter (and thus depends on the ICU conversion code) 2986 * unless U_CHARSET_IS_UTF8 is set to 1. 2987 * @param codepageData an array of bytes in the platform's default codepage. 2988 * @param dataLength The number of bytes in <TT>codepageData</TT>. 2989 * @stable ICU 2.0 2990 */ 2991 UnicodeString(const char *codepageData, int32_t dataLength); 2992 2993#endif 2994 2995#if !UCONFIG_NO_CONVERSION 2996 2997 /** 2998 * char* constructor. 2999 * @param codepageData an array of bytes, null-terminated 3000 * @param codepage the encoding of <TT>codepageData</TT>. The special 3001 * value 0 for <TT>codepage</TT> indicates that the text is in the 3002 * platform's default codepage. 3003 * 3004 * If <code>codepage</code> is an empty string (<code>""</code>), 3005 * then a simple conversion is performed on the codepage-invariant 3006 * subset ("invariant characters") of the platform encoding. See utypes.h. 3007 * Recommendation: For invariant-character strings use the constructor 3008 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3009 * because it avoids object code dependencies of UnicodeString on 3010 * the conversion code. 3011 * 3012 * @stable ICU 2.0 3013 */ 3014 UnicodeString(const char *codepageData, const char *codepage); 3015 3016 /** 3017 * char* constructor. 3018 * @param codepageData an array of bytes. 3019 * @param dataLength The number of bytes in <TT>codepageData</TT>. 3020 * @param codepage the encoding of <TT>codepageData</TT>. The special 3021 * value 0 for <TT>codepage</TT> indicates that the text is in the 3022 * platform's default codepage. 3023 * If <code>codepage</code> is an empty string (<code>""</code>), 3024 * then a simple conversion is performed on the codepage-invariant 3025 * subset ("invariant characters") of the platform encoding. See utypes.h. 3026 * Recommendation: For invariant-character strings use the constructor 3027 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3028 * because it avoids object code dependencies of UnicodeString on 3029 * the conversion code. 3030 * 3031 * @stable ICU 2.0 3032 */ 3033 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 3034 3035 /** 3036 * char * / UConverter constructor. 3037 * This constructor uses an existing UConverter object to 3038 * convert the codepage string to Unicode and construct a UnicodeString 3039 * from that. 3040 * 3041 * The converter is reset at first. 3042 * If the error code indicates a failure before this constructor is called, 3043 * or if an error occurs during conversion or construction, 3044 * then the string will be bogus. 3045 * 3046 * This function avoids the overhead of opening and closing a converter if 3047 * multiple strings are constructed. 3048 * 3049 * @param src input codepage string 3050 * @param srcLength length of the input string, can be -1 for NUL-terminated strings 3051 * @param cnv converter object (ucnv_resetToUnicode() will be called), 3052 * can be NULL for the default converter 3053 * @param errorCode normal ICU error code 3054 * @stable ICU 2.0 3055 */ 3056 UnicodeString( 3057 const char *src, int32_t srcLength, 3058 UConverter *cnv, 3059 UErrorCode &errorCode); 3060 3061#endif 3062 3063 /** 3064 * Constructs a Unicode string from an invariant-character char * string. 3065 * About invariant characters see utypes.h. 3066 * This constructor has no runtime dependency on conversion code and is 3067 * therefore recommended over ones taking a charset name string 3068 * (where the empty string "" indicates invariant-character conversion). 3069 * 3070 * Use the macro US_INV as the third, signature-distinguishing parameter. 3071 * 3072 * For example: 3073 * \code 3074 * void fn(const char *s) { 3075 * UnicodeString ustr(s, -1, US_INV); 3076 * // use ustr ... 3077 * } 3078 * \endcode 3079 * 3080 * @param src String using only invariant characters. 3081 * @param length Length of src, or -1 if NUL-terminated. 3082 * @param inv Signature-distinguishing paramater, use US_INV. 3083 * 3084 * @see US_INV 3085 * @stable ICU 3.2 3086 */ 3087 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 3088 3089 3090 /** 3091 * Copy constructor. 3092 * @param that The UnicodeString object to copy. 3093 * @stable ICU 2.0 3094 */ 3095 UnicodeString(const UnicodeString& that); 3096 3097 /** 3098 * 'Substring' constructor from tail of source string. 3099 * @param src The UnicodeString object to copy. 3100 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3101 * @stable ICU 2.2 3102 */ 3103 UnicodeString(const UnicodeString& src, int32_t srcStart); 3104 3105 /** 3106 * 'Substring' constructor from subrange of source string. 3107 * @param src The UnicodeString object to copy. 3108 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3109 * @param srcLength The number of characters from <tt>src</tt> to copy. 3110 * @stable ICU 2.2 3111 */ 3112 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 3113 3114 /** 3115 * Clone this object, an instance of a subclass of Replaceable. 3116 * Clones can be used concurrently in multiple threads. 3117 * If a subclass does not implement clone(), or if an error occurs, 3118 * then NULL is returned. 3119 * The clone functions in all subclasses return a pointer to a Replaceable 3120 * because some compilers do not support covariant (same-as-this) 3121 * return types; cast to the appropriate subclass if necessary. 3122 * The caller must delete the clone. 3123 * 3124 * @return a clone of this object 3125 * 3126 * @see Replaceable::clone 3127 * @see getDynamicClassID 3128 * @stable ICU 2.6 3129 */ 3130 virtual Replaceable *clone() const; 3131 3132 /** Destructor. 3133 * @stable ICU 2.0 3134 */ 3135 virtual ~UnicodeString(); 3136 3137 /** 3138 * Create a UnicodeString from a UTF-8 string. 3139 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3140 * Calls u_strFromUTF8WithSub(). 3141 * 3142 * @param utf8 UTF-8 input string. 3143 * Note that a StringPiece can be implicitly constructed 3144 * from a std::string or a NUL-terminated const char * string. 3145 * @return A UnicodeString with equivalent UTF-16 contents. 3146 * @see toUTF8 3147 * @see toUTF8String 3148 * @stable ICU 4.2 3149 */ 3150 static UnicodeString fromUTF8(const StringPiece &utf8); 3151 3152 /** 3153 * Create a UnicodeString from a UTF-32 string. 3154 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3155 * Calls u_strFromUTF32WithSub(). 3156 * 3157 * @param utf32 UTF-32 input string. Must not be NULL. 3158 * @param length Length of the input string, or -1 if NUL-terminated. 3159 * @return A UnicodeString with equivalent UTF-16 contents. 3160 * @see toUTF32 3161 * @stable ICU 4.2 3162 */ 3163 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 3164 3165 /* Miscellaneous operations */ 3166 3167 /** 3168 * Unescape a string of characters and return a string containing 3169 * the result. The following escape sequences are recognized: 3170 * 3171 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] 3172 * \\Uhhhhhhhh 8 hex digits 3173 * \\xhh 1-2 hex digits 3174 * \\ooo 1-3 octal digits; o in [0-7] 3175 * \\cX control-X; X is masked with 0x1F 3176 * 3177 * as well as the standard ANSI C escapes: 3178 * 3179 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, 3180 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, 3181 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C 3182 * 3183 * Anything else following a backslash is generically escaped. For 3184 * example, "[a\\-z]" returns "[a-z]". 3185 * 3186 * If an escape sequence is ill-formed, this method returns an empty 3187 * string. An example of an ill-formed sequence is "\\u" followed by 3188 * fewer than 4 hex digits. 3189 * 3190 * This function is similar to u_unescape() but not identical to it. 3191 * The latter takes a source char*, so it does escape recognition 3192 * and also invariant conversion. 3193 * 3194 * @return a string with backslash escapes interpreted, or an 3195 * empty string on error. 3196 * @see UnicodeString#unescapeAt() 3197 * @see u_unescape() 3198 * @see u_unescapeAt() 3199 * @stable ICU 2.0 3200 */ 3201 UnicodeString unescape() const; 3202 3203 /** 3204 * Unescape a single escape sequence and return the represented 3205 * character. See unescape() for a listing of the recognized escape 3206 * sequences. The character at offset-1 is assumed (without 3207 * checking) to be a backslash. If the escape sequence is 3208 * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is 3209 * returned. 3210 * 3211 * @param offset an input output parameter. On input, it is the 3212 * offset into this string where the escape sequence is located, 3213 * after the initial backslash. On output, it is advanced after the 3214 * last character parsed. On error, it is not advanced at all. 3215 * @return the character represented by the escape sequence at 3216 * offset, or (UChar32)0xFFFFFFFF on error. 3217 * @see UnicodeString#unescape() 3218 * @see u_unescape() 3219 * @see u_unescapeAt() 3220 * @stable ICU 2.0 3221 */ 3222 UChar32 unescapeAt(int32_t &offset) const; 3223 3224 /** 3225 * ICU "poor man's RTTI", returns a UClassID for this class. 3226 * 3227 * @stable ICU 2.2 3228 */ 3229 static UClassID U_EXPORT2 getStaticClassID(); 3230 3231 /** 3232 * ICU "poor man's RTTI", returns a UClassID for the actual class. 3233 * 3234 * @stable ICU 2.2 3235 */ 3236 virtual UClassID getDynamicClassID() const; 3237 3238 //======================================== 3239 // Implementation methods 3240 //======================================== 3241 3242protected: 3243 /** 3244 * Implement Replaceable::getLength() (see jitterbug 1027). 3245 * @stable ICU 2.4 3246 */ 3247 virtual int32_t getLength() const; 3248 3249 /** 3250 * The change in Replaceable to use virtual getCharAt() allows 3251 * UnicodeString::charAt() to be inline again (see jitterbug 709). 3252 * @stable ICU 2.4 3253 */ 3254 virtual UChar getCharAt(int32_t offset) const; 3255 3256 /** 3257 * The change in Replaceable to use virtual getChar32At() allows 3258 * UnicodeString::char32At() to be inline again (see jitterbug 709). 3259 * @stable ICU 2.4 3260 */ 3261 virtual UChar32 getChar32At(int32_t offset) const; 3262 3263private: 3264 // For char* constructors. Could be made public. 3265 UnicodeString &setToUTF8(const StringPiece &utf8); 3266 // For extract(char*). 3267 // We could make a toUTF8(target, capacity, errorCode) public but not 3268 // this version: New API will be cleaner if we make callers create substrings 3269 // rather than having start+length on every method, 3270 // and it should take a UErrorCode&. 3271 int32_t 3272 toUTF8(int32_t start, int32_t len, 3273 char *target, int32_t capacity) const; 3274 3275 /** 3276 * Internal string contents comparison, called by operator==. 3277 * Requires: this & text not bogus and have same lengths. 3278 */ 3279 UBool doEquals(const UnicodeString &text, int32_t len) const; 3280 3281 inline int8_t 3282 doCompare(int32_t start, 3283 int32_t length, 3284 const UnicodeString& srcText, 3285 int32_t srcStart, 3286 int32_t srcLength) const; 3287 3288 int8_t doCompare(int32_t start, 3289 int32_t length, 3290 const UChar *srcChars, 3291 int32_t srcStart, 3292 int32_t srcLength) const; 3293 3294 inline int8_t 3295 doCompareCodePointOrder(int32_t start, 3296 int32_t length, 3297 const UnicodeString& srcText, 3298 int32_t srcStart, 3299 int32_t srcLength) const; 3300 3301 int8_t doCompareCodePointOrder(int32_t start, 3302 int32_t length, 3303 const UChar *srcChars, 3304 int32_t srcStart, 3305 int32_t srcLength) const; 3306 3307 inline int8_t 3308 doCaseCompare(int32_t start, 3309 int32_t length, 3310 const UnicodeString &srcText, 3311 int32_t srcStart, 3312 int32_t srcLength, 3313 uint32_t options) const; 3314 3315 int8_t 3316 doCaseCompare(int32_t start, 3317 int32_t length, 3318 const UChar *srcChars, 3319 int32_t srcStart, 3320 int32_t srcLength, 3321 uint32_t options) const; 3322 3323 int32_t doIndexOf(UChar c, 3324 int32_t start, 3325 int32_t length) const; 3326 3327 int32_t doIndexOf(UChar32 c, 3328 int32_t start, 3329 int32_t length) const; 3330 3331 int32_t doLastIndexOf(UChar c, 3332 int32_t start, 3333 int32_t length) const; 3334 3335 int32_t doLastIndexOf(UChar32 c, 3336 int32_t start, 3337 int32_t length) const; 3338 3339 void doExtract(int32_t start, 3340 int32_t length, 3341 UChar *dst, 3342 int32_t dstStart) const; 3343 3344 inline void doExtract(int32_t start, 3345 int32_t length, 3346 UnicodeString& target) const; 3347 3348 inline UChar doCharAt(int32_t offset) const; 3349 3350 UnicodeString& doReplace(int32_t start, 3351 int32_t length, 3352 const UnicodeString& srcText, 3353 int32_t srcStart, 3354 int32_t srcLength); 3355 3356 UnicodeString& doReplace(int32_t start, 3357 int32_t length, 3358 const UChar *srcChars, 3359 int32_t srcStart, 3360 int32_t srcLength); 3361 3362 UnicodeString& doReverse(int32_t start, 3363 int32_t length); 3364 3365 // calculate hash code 3366 int32_t doHashCode(void) const; 3367 3368 // get pointer to start of array 3369 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 3370 inline UChar* getArrayStart(void); 3371 inline const UChar* getArrayStart(void) const; 3372 3373 // A UnicodeString object (not necessarily its current buffer) 3374 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 3375 inline UBool isWritable() const; 3376 3377 // Is the current buffer writable? 3378 inline UBool isBufferWritable() const; 3379 3380 // None of the following does releaseArray(). 3381 inline void setLength(int32_t len); // sets only fShortLength and fLength 3382 inline void setToEmpty(); // sets fFlags=kShortString 3383 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags 3384 3385 // allocate the array; result may be fStackBuffer 3386 // sets refCount to 1 if appropriate 3387 // sets fArray, fCapacity, and fFlags 3388 // returns boolean for success or failure 3389 UBool allocate(int32_t capacity); 3390 3391 // release the array if owned 3392 void releaseArray(void); 3393 3394 // turn a bogus string into an empty one 3395 void unBogus(); 3396 3397 // implements assigment operator, copy constructor, and fastCopyFrom() 3398 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 3399 3400 // Pin start and limit to acceptable values. 3401 inline void pinIndex(int32_t& start) const; 3402 inline void pinIndices(int32_t& start, 3403 int32_t& length) const; 3404 3405#if !UCONFIG_NO_CONVERSION 3406 3407 /* Internal extract() using UConverter. */ 3408 int32_t doExtract(int32_t start, int32_t length, 3409 char *dest, int32_t destCapacity, 3410 UConverter *cnv, 3411 UErrorCode &errorCode) const; 3412 3413 /* 3414 * Real constructor for converting from codepage data. 3415 * It assumes that it is called with !fRefCounted. 3416 * 3417 * If <code>codepage==0</code>, then the default converter 3418 * is used for the platform encoding. 3419 * If <code>codepage</code> is an empty string (<code>""</code>), 3420 * then a simple conversion is performed on the codepage-invariant 3421 * subset ("invariant characters") of the platform encoding. See utypes.h. 3422 */ 3423 void doCodepageCreate(const char *codepageData, 3424 int32_t dataLength, 3425 const char *codepage); 3426 3427 /* 3428 * Worker function for creating a UnicodeString from 3429 * a codepage string using a UConverter. 3430 */ 3431 void 3432 doCodepageCreate(const char *codepageData, 3433 int32_t dataLength, 3434 UConverter *converter, 3435 UErrorCode &status); 3436 3437#endif 3438 3439 /* 3440 * This function is called when write access to the array 3441 * is necessary. 3442 * 3443 * We need to make a copy of the array if 3444 * the buffer is read-only, or 3445 * the buffer is refCounted (shared), and refCount>1, or 3446 * the buffer is too small. 3447 * 3448 * Return FALSE if memory could not be allocated. 3449 */ 3450 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 3451 int32_t growCapacity = -1, 3452 UBool doCopyArray = TRUE, 3453 int32_t **pBufferToDelete = 0, 3454 UBool forceClone = FALSE); 3455 3456 /** 3457 * Common function for UnicodeString case mappings. 3458 * The stringCaseMapper has the same type UStringCaseMapper 3459 * as in ustr_imp.h for ustrcase_map(). 3460 */ 3461 UnicodeString & 3462 caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); 3463 3464 // ref counting 3465 void addRef(void); 3466 int32_t removeRef(void); 3467 int32_t refCount(void) const; 3468 3469 // constants 3470 enum { 3471 // Set the stack buffer size so that sizeof(UnicodeString) is, 3472 // naturally (without padding), a multiple of sizeof(pointer). 3473 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings 3474 kInvalidUChar=0xffff, // invalid UChar index 3475 kGrowSize=128, // grow size for this buffer 3476 kInvalidHashCode=0, // invalid hash code 3477 kEmptyHashCode=1, // hash code for empty string 3478 3479 // bit flag values for fFlags 3480 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 3481 kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields 3482 kRefCounted=4, // there is a refCount field before the characters in fArray 3483 kBufferIsReadonly=8,// do not write to this buffer 3484 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 3485 // and releaseBuffer(newLength) must be called 3486 3487 // combined values for convenience 3488 kShortString=kUsingStackBuffer, 3489 kLongString=kRefCounted, 3490 kReadonlyAlias=kBufferIsReadonly, 3491 kWritableAlias=0 3492 }; 3493 3494 friend class StringThreadTest; 3495 friend class UnicodeStringAppendable; 3496 3497 union StackBufferOrFields; // forward declaration necessary before friend declaration 3498 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 3499 3500 /* 3501 * The following are all the class fields that are stored 3502 * in each UnicodeString object. 3503 * Note that UnicodeString has virtual functions, 3504 * therefore there is an implicit vtable pointer 3505 * as the first real field. 3506 * The fields should be aligned such that no padding is necessary. 3507 * On 32-bit machines, the size should be 32 bytes, 3508 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 3509 * 3510 * We use a hack to achieve this. 3511 * 3512 * With at least some compilers, each of the following is forced to 3513 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 3514 * rounded up with additional padding if the fields do not already fit that requirement: 3515 * - sizeof(class UnicodeString) 3516 * - offsetof(UnicodeString, fUnion) 3517 * - sizeof(fUnion) 3518 * - sizeof(fFields) 3519 * 3520 * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars) 3521 * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines. 3522 * (Padding at the end of fFields is ok: 3523 * As long as there is no padding after fStackBuffer, it is not wasted space.) 3524 * 3525 * We further assume that the compiler does not reorder the fields, 3526 * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion, 3527 * with at most some padding (but no other field) in between. 3528 * (Padding there would be wasted space, but functionally harmless.) 3529 * 3530 * We use a few more sizeof(pointer)'s chunks of space with 3531 * fRestOfStackBuffer, fShortLength and fFlags, 3532 * to get up exactly to the intended sizeof(UnicodeString). 3533 */ 3534 // (implicit) *vtable; 3535 union StackBufferOrFields { 3536 // fStackBuffer is used iff (fFlags&kUsingStackBuffer) 3537 // else fFields is used 3538 UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer 3539 struct { 3540 UChar *fArray; // the Unicode data 3541 int32_t fCapacity; // capacity of fArray (in UChars) 3542 int32_t fLength; // number of characters in fArray if >127; else undefined 3543 } fFields; 3544 } fUnion; 3545 UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8]; 3546 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength 3547 uint8_t fFlags; // bit flags: see constants above 3548}; 3549 3550/** 3551 * Create a new UnicodeString with the concatenation of two others. 3552 * 3553 * @param s1 The first string to be copied to the new one. 3554 * @param s2 The second string to be copied to the new one, after s1. 3555 * @return UnicodeString(s1).append(s2) 3556 * @stable ICU 2.8 3557 */ 3558U_COMMON_API UnicodeString U_EXPORT2 3559operator+ (const UnicodeString &s1, const UnicodeString &s2); 3560 3561//======================================== 3562// Inline members 3563//======================================== 3564 3565//======================================== 3566// Privates 3567//======================================== 3568 3569inline void 3570UnicodeString::pinIndex(int32_t& start) const 3571{ 3572 // pin index 3573 if(start < 0) { 3574 start = 0; 3575 } else if(start > length()) { 3576 start = length(); 3577 } 3578} 3579 3580inline void 3581UnicodeString::pinIndices(int32_t& start, 3582 int32_t& _length) const 3583{ 3584 // pin indices 3585 int32_t len = length(); 3586 if(start < 0) { 3587 start = 0; 3588 } else if(start > len) { 3589 start = len; 3590 } 3591 if(_length < 0) { 3592 _length = 0; 3593 } else if(_length > (len - start)) { 3594 _length = (len - start); 3595 } 3596} 3597 3598inline UChar* 3599UnicodeString::getArrayStart() 3600{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 3601 3602inline const UChar* 3603UnicodeString::getArrayStart() const 3604{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 3605 3606//======================================== 3607// Read-only implementation methods 3608//======================================== 3609inline int32_t 3610UnicodeString::length() const 3611{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } 3612 3613inline int32_t 3614UnicodeString::getCapacity() const 3615{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } 3616 3617inline int32_t 3618UnicodeString::hashCode() const 3619{ return doHashCode(); } 3620 3621inline UBool 3622UnicodeString::isBogus() const 3623{ return (UBool)(fFlags & kIsBogus); } 3624 3625inline UBool 3626UnicodeString::isWritable() const 3627{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } 3628 3629inline UBool 3630UnicodeString::isBufferWritable() const 3631{ 3632 return (UBool)( 3633 !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 3634 (!(fFlags&kRefCounted) || refCount()==1)); 3635} 3636 3637inline const UChar * 3638UnicodeString::getBuffer() const { 3639 if(fFlags&(kIsBogus|kOpenGetBuffer)) { 3640 return 0; 3641 } else if(fFlags&kUsingStackBuffer) { 3642 return fUnion.fStackBuffer; 3643 } else { 3644 return fUnion.fFields.fArray; 3645 } 3646} 3647 3648//======================================== 3649// Read-only alias methods 3650//======================================== 3651inline int8_t 3652UnicodeString::doCompare(int32_t start, 3653 int32_t thisLength, 3654 const UnicodeString& srcText, 3655 int32_t srcStart, 3656 int32_t srcLength) const 3657{ 3658 if(srcText.isBogus()) { 3659 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3660 } else { 3661 srcText.pinIndices(srcStart, srcLength); 3662 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 3663 } 3664} 3665 3666inline UBool 3667UnicodeString::operator== (const UnicodeString& text) const 3668{ 3669 if(isBogus()) { 3670 return text.isBogus(); 3671 } else { 3672 int32_t len = length(), textLength = text.length(); 3673 return !text.isBogus() && len == textLength && doEquals(text, len); 3674 } 3675} 3676 3677inline UBool 3678UnicodeString::operator!= (const UnicodeString& text) const 3679{ return (! operator==(text)); } 3680 3681inline UBool 3682UnicodeString::operator> (const UnicodeString& text) const 3683{ return doCompare(0, length(), text, 0, text.length()) == 1; } 3684 3685inline UBool 3686UnicodeString::operator< (const UnicodeString& text) const 3687{ return doCompare(0, length(), text, 0, text.length()) == -1; } 3688 3689inline UBool 3690UnicodeString::operator>= (const UnicodeString& text) const 3691{ return doCompare(0, length(), text, 0, text.length()) != -1; } 3692 3693inline UBool 3694UnicodeString::operator<= (const UnicodeString& text) const 3695{ return doCompare(0, length(), text, 0, text.length()) != 1; } 3696 3697inline int8_t 3698UnicodeString::compare(const UnicodeString& text) const 3699{ return doCompare(0, length(), text, 0, text.length()); } 3700 3701inline int8_t 3702UnicodeString::compare(int32_t start, 3703 int32_t _length, 3704 const UnicodeString& srcText) const 3705{ return doCompare(start, _length, srcText, 0, srcText.length()); } 3706 3707inline int8_t 3708UnicodeString::compare(const UChar *srcChars, 3709 int32_t srcLength) const 3710{ return doCompare(0, length(), srcChars, 0, srcLength); } 3711 3712inline int8_t 3713UnicodeString::compare(int32_t start, 3714 int32_t _length, 3715 const UnicodeString& srcText, 3716 int32_t srcStart, 3717 int32_t srcLength) const 3718{ return doCompare(start, _length, srcText, srcStart, srcLength); } 3719 3720inline int8_t 3721UnicodeString::compare(int32_t start, 3722 int32_t _length, 3723 const UChar *srcChars) const 3724{ return doCompare(start, _length, srcChars, 0, _length); } 3725 3726inline int8_t 3727UnicodeString::compare(int32_t start, 3728 int32_t _length, 3729 const UChar *srcChars, 3730 int32_t srcStart, 3731 int32_t srcLength) const 3732{ return doCompare(start, _length, srcChars, srcStart, srcLength); } 3733 3734inline int8_t 3735UnicodeString::compareBetween(int32_t start, 3736 int32_t limit, 3737 const UnicodeString& srcText, 3738 int32_t srcStart, 3739 int32_t srcLimit) const 3740{ return doCompare(start, limit - start, 3741 srcText, srcStart, srcLimit - srcStart); } 3742 3743inline int8_t 3744UnicodeString::doCompareCodePointOrder(int32_t start, 3745 int32_t thisLength, 3746 const UnicodeString& srcText, 3747 int32_t srcStart, 3748 int32_t srcLength) const 3749{ 3750 if(srcText.isBogus()) { 3751 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3752 } else { 3753 srcText.pinIndices(srcStart, srcLength); 3754 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 3755 } 3756} 3757 3758inline int8_t 3759UnicodeString::compareCodePointOrder(const UnicodeString& text) const 3760{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 3761 3762inline int8_t 3763UnicodeString::compareCodePointOrder(int32_t start, 3764 int32_t _length, 3765 const UnicodeString& srcText) const 3766{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 3767 3768inline int8_t 3769UnicodeString::compareCodePointOrder(const UChar *srcChars, 3770 int32_t srcLength) const 3771{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 3772 3773inline int8_t 3774UnicodeString::compareCodePointOrder(int32_t start, 3775 int32_t _length, 3776 const UnicodeString& srcText, 3777 int32_t srcStart, 3778 int32_t srcLength) const 3779{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 3780 3781inline int8_t 3782UnicodeString::compareCodePointOrder(int32_t start, 3783 int32_t _length, 3784 const UChar *srcChars) const 3785{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 3786 3787inline int8_t 3788UnicodeString::compareCodePointOrder(int32_t start, 3789 int32_t _length, 3790 const UChar *srcChars, 3791 int32_t srcStart, 3792 int32_t srcLength) const 3793{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 3794 3795inline int8_t 3796UnicodeString::compareCodePointOrderBetween(int32_t start, 3797 int32_t limit, 3798 const UnicodeString& srcText, 3799 int32_t srcStart, 3800 int32_t srcLimit) const 3801{ return doCompareCodePointOrder(start, limit - start, 3802 srcText, srcStart, srcLimit - srcStart); } 3803 3804inline int8_t 3805UnicodeString::doCaseCompare(int32_t start, 3806 int32_t thisLength, 3807 const UnicodeString &srcText, 3808 int32_t srcStart, 3809 int32_t srcLength, 3810 uint32_t options) const 3811{ 3812 if(srcText.isBogus()) { 3813 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3814 } else { 3815 srcText.pinIndices(srcStart, srcLength); 3816 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 3817 } 3818} 3819 3820inline int8_t 3821UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 3822 return doCaseCompare(0, length(), text, 0, text.length(), options); 3823} 3824 3825inline int8_t 3826UnicodeString::caseCompare(int32_t start, 3827 int32_t _length, 3828 const UnicodeString &srcText, 3829 uint32_t options) const { 3830 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 3831} 3832 3833inline int8_t 3834UnicodeString::caseCompare(const UChar *srcChars, 3835 int32_t srcLength, 3836 uint32_t options) const { 3837 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 3838} 3839 3840inline int8_t 3841UnicodeString::caseCompare(int32_t start, 3842 int32_t _length, 3843 const UnicodeString &srcText, 3844 int32_t srcStart, 3845 int32_t srcLength, 3846 uint32_t options) const { 3847 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 3848} 3849 3850inline int8_t 3851UnicodeString::caseCompare(int32_t start, 3852 int32_t _length, 3853 const UChar *srcChars, 3854 uint32_t options) const { 3855 return doCaseCompare(start, _length, srcChars, 0, _length, options); 3856} 3857 3858inline int8_t 3859UnicodeString::caseCompare(int32_t start, 3860 int32_t _length, 3861 const UChar *srcChars, 3862 int32_t srcStart, 3863 int32_t srcLength, 3864 uint32_t options) const { 3865 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 3866} 3867 3868inline int8_t 3869UnicodeString::caseCompareBetween(int32_t start, 3870 int32_t limit, 3871 const UnicodeString &srcText, 3872 int32_t srcStart, 3873 int32_t srcLimit, 3874 uint32_t options) const { 3875 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 3876} 3877 3878inline int32_t 3879UnicodeString::indexOf(const UnicodeString& srcText, 3880 int32_t srcStart, 3881 int32_t srcLength, 3882 int32_t start, 3883 int32_t _length) const 3884{ 3885 if(!srcText.isBogus()) { 3886 srcText.pinIndices(srcStart, srcLength); 3887 if(srcLength > 0) { 3888 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 3889 } 3890 } 3891 return -1; 3892} 3893 3894inline int32_t 3895UnicodeString::indexOf(const UnicodeString& text) const 3896{ return indexOf(text, 0, text.length(), 0, length()); } 3897 3898inline int32_t 3899UnicodeString::indexOf(const UnicodeString& text, 3900 int32_t start) const { 3901 pinIndex(start); 3902 return indexOf(text, 0, text.length(), start, length() - start); 3903} 3904 3905inline int32_t 3906UnicodeString::indexOf(const UnicodeString& text, 3907 int32_t start, 3908 int32_t _length) const 3909{ return indexOf(text, 0, text.length(), start, _length); } 3910 3911inline int32_t 3912UnicodeString::indexOf(const UChar *srcChars, 3913 int32_t srcLength, 3914 int32_t start) const { 3915 pinIndex(start); 3916 return indexOf(srcChars, 0, srcLength, start, length() - start); 3917} 3918 3919inline int32_t 3920UnicodeString::indexOf(const UChar *srcChars, 3921 int32_t srcLength, 3922 int32_t start, 3923 int32_t _length) const 3924{ return indexOf(srcChars, 0, srcLength, start, _length); } 3925 3926inline int32_t 3927UnicodeString::indexOf(UChar c, 3928 int32_t start, 3929 int32_t _length) const 3930{ return doIndexOf(c, start, _length); } 3931 3932inline int32_t 3933UnicodeString::indexOf(UChar32 c, 3934 int32_t start, 3935 int32_t _length) const 3936{ return doIndexOf(c, start, _length); } 3937 3938inline int32_t 3939UnicodeString::indexOf(UChar c) const 3940{ return doIndexOf(c, 0, length()); } 3941 3942inline int32_t 3943UnicodeString::indexOf(UChar32 c) const 3944{ return indexOf(c, 0, length()); } 3945 3946inline int32_t 3947UnicodeString::indexOf(UChar c, 3948 int32_t start) const { 3949 pinIndex(start); 3950 return doIndexOf(c, start, length() - start); 3951} 3952 3953inline int32_t 3954UnicodeString::indexOf(UChar32 c, 3955 int32_t start) const { 3956 pinIndex(start); 3957 return indexOf(c, start, length() - start); 3958} 3959 3960inline int32_t 3961UnicodeString::lastIndexOf(const UChar *srcChars, 3962 int32_t srcLength, 3963 int32_t start, 3964 int32_t _length) const 3965{ return lastIndexOf(srcChars, 0, srcLength, start, _length); } 3966 3967inline int32_t 3968UnicodeString::lastIndexOf(const UChar *srcChars, 3969 int32_t srcLength, 3970 int32_t start) const { 3971 pinIndex(start); 3972 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 3973} 3974 3975inline int32_t 3976UnicodeString::lastIndexOf(const UnicodeString& srcText, 3977 int32_t srcStart, 3978 int32_t srcLength, 3979 int32_t start, 3980 int32_t _length) const 3981{ 3982 if(!srcText.isBogus()) { 3983 srcText.pinIndices(srcStart, srcLength); 3984 if(srcLength > 0) { 3985 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 3986 } 3987 } 3988 return -1; 3989} 3990 3991inline int32_t 3992UnicodeString::lastIndexOf(const UnicodeString& text, 3993 int32_t start, 3994 int32_t _length) const 3995{ return lastIndexOf(text, 0, text.length(), start, _length); } 3996 3997inline int32_t 3998UnicodeString::lastIndexOf(const UnicodeString& text, 3999 int32_t start) const { 4000 pinIndex(start); 4001 return lastIndexOf(text, 0, text.length(), start, length() - start); 4002} 4003 4004inline int32_t 4005UnicodeString::lastIndexOf(const UnicodeString& text) const 4006{ return lastIndexOf(text, 0, text.length(), 0, length()); } 4007 4008inline int32_t 4009UnicodeString::lastIndexOf(UChar c, 4010 int32_t start, 4011 int32_t _length) const 4012{ return doLastIndexOf(c, start, _length); } 4013 4014inline int32_t 4015UnicodeString::lastIndexOf(UChar32 c, 4016 int32_t start, 4017 int32_t _length) const { 4018 return doLastIndexOf(c, start, _length); 4019} 4020 4021inline int32_t 4022UnicodeString::lastIndexOf(UChar c) const 4023{ return doLastIndexOf(c, 0, length()); } 4024 4025inline int32_t 4026UnicodeString::lastIndexOf(UChar32 c) const { 4027 return lastIndexOf(c, 0, length()); 4028} 4029 4030inline int32_t 4031UnicodeString::lastIndexOf(UChar c, 4032 int32_t start) const { 4033 pinIndex(start); 4034 return doLastIndexOf(c, start, length() - start); 4035} 4036 4037inline int32_t 4038UnicodeString::lastIndexOf(UChar32 c, 4039 int32_t start) const { 4040 pinIndex(start); 4041 return lastIndexOf(c, start, length() - start); 4042} 4043 4044inline UBool 4045UnicodeString::startsWith(const UnicodeString& text) const 4046{ return compare(0, text.length(), text, 0, text.length()) == 0; } 4047 4048inline UBool 4049UnicodeString::startsWith(const UnicodeString& srcText, 4050 int32_t srcStart, 4051 int32_t srcLength) const 4052{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 4053 4054inline UBool 4055UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { 4056 if(srcLength < 0) { 4057 srcLength = u_strlen(srcChars); 4058 } 4059 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; 4060} 4061 4062inline UBool 4063UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { 4064 if(srcLength < 0) { 4065 srcLength = u_strlen(srcChars); 4066 } 4067 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; 4068} 4069 4070inline UBool 4071UnicodeString::endsWith(const UnicodeString& text) const 4072{ return doCompare(length() - text.length(), text.length(), 4073 text, 0, text.length()) == 0; } 4074 4075inline UBool 4076UnicodeString::endsWith(const UnicodeString& srcText, 4077 int32_t srcStart, 4078 int32_t srcLength) const { 4079 srcText.pinIndices(srcStart, srcLength); 4080 return doCompare(length() - srcLength, srcLength, 4081 srcText, srcStart, srcLength) == 0; 4082} 4083 4084inline UBool 4085UnicodeString::endsWith(const UChar *srcChars, 4086 int32_t srcLength) const { 4087 if(srcLength < 0) { 4088 srcLength = u_strlen(srcChars); 4089 } 4090 return doCompare(length() - srcLength, srcLength, 4091 srcChars, 0, srcLength) == 0; 4092} 4093 4094inline UBool 4095UnicodeString::endsWith(const UChar *srcChars, 4096 int32_t srcStart, 4097 int32_t srcLength) const { 4098 if(srcLength < 0) { 4099 srcLength = u_strlen(srcChars + srcStart); 4100 } 4101 return doCompare(length() - srcLength, srcLength, 4102 srcChars, srcStart, srcLength) == 0; 4103} 4104 4105//======================================== 4106// replace 4107//======================================== 4108inline UnicodeString& 4109UnicodeString::replace(int32_t start, 4110 int32_t _length, 4111 const UnicodeString& srcText) 4112{ return doReplace(start, _length, srcText, 0, srcText.length()); } 4113 4114inline UnicodeString& 4115UnicodeString::replace(int32_t start, 4116 int32_t _length, 4117 const UnicodeString& srcText, 4118 int32_t srcStart, 4119 int32_t srcLength) 4120{ return doReplace(start, _length, srcText, srcStart, srcLength); } 4121 4122inline UnicodeString& 4123UnicodeString::replace(int32_t start, 4124 int32_t _length, 4125 const UChar *srcChars, 4126 int32_t srcLength) 4127{ return doReplace(start, _length, srcChars, 0, srcLength); } 4128 4129inline UnicodeString& 4130UnicodeString::replace(int32_t start, 4131 int32_t _length, 4132 const UChar *srcChars, 4133 int32_t srcStart, 4134 int32_t srcLength) 4135{ return doReplace(start, _length, srcChars, srcStart, srcLength); } 4136 4137inline UnicodeString& 4138UnicodeString::replace(int32_t start, 4139 int32_t _length, 4140 UChar srcChar) 4141{ return doReplace(start, _length, &srcChar, 0, 1); } 4142 4143inline UnicodeString& 4144UnicodeString::replaceBetween(int32_t start, 4145 int32_t limit, 4146 const UnicodeString& srcText) 4147{ return doReplace(start, limit - start, srcText, 0, srcText.length()); } 4148 4149inline UnicodeString& 4150UnicodeString::replaceBetween(int32_t start, 4151 int32_t limit, 4152 const UnicodeString& srcText, 4153 int32_t srcStart, 4154 int32_t srcLimit) 4155{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 4156 4157inline UnicodeString& 4158UnicodeString::findAndReplace(const UnicodeString& oldText, 4159 const UnicodeString& newText) 4160{ return findAndReplace(0, length(), oldText, 0, oldText.length(), 4161 newText, 0, newText.length()); } 4162 4163inline UnicodeString& 4164UnicodeString::findAndReplace(int32_t start, 4165 int32_t _length, 4166 const UnicodeString& oldText, 4167 const UnicodeString& newText) 4168{ return findAndReplace(start, _length, oldText, 0, oldText.length(), 4169 newText, 0, newText.length()); } 4170 4171// ============================ 4172// extract 4173// ============================ 4174inline void 4175UnicodeString::doExtract(int32_t start, 4176 int32_t _length, 4177 UnicodeString& target) const 4178{ target.replace(0, target.length(), *this, start, _length); } 4179 4180inline void 4181UnicodeString::extract(int32_t start, 4182 int32_t _length, 4183 UChar *target, 4184 int32_t targetStart) const 4185{ doExtract(start, _length, target, targetStart); } 4186 4187inline void 4188UnicodeString::extract(int32_t start, 4189 int32_t _length, 4190 UnicodeString& target) const 4191{ doExtract(start, _length, target); } 4192 4193#if !UCONFIG_NO_CONVERSION 4194 4195inline int32_t 4196UnicodeString::extract(int32_t start, 4197 int32_t _length, 4198 char *dst, 4199 const char *codepage) const 4200 4201{ 4202 // This dstSize value will be checked explicitly 4203 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 4204} 4205 4206#endif 4207 4208inline void 4209UnicodeString::extractBetween(int32_t start, 4210 int32_t limit, 4211 UChar *dst, 4212 int32_t dstStart) const { 4213 pinIndex(start); 4214 pinIndex(limit); 4215 doExtract(start, limit - start, dst, dstStart); 4216} 4217 4218inline UnicodeString 4219UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 4220 return tempSubString(start, limit - start); 4221} 4222 4223inline UChar 4224UnicodeString::doCharAt(int32_t offset) const 4225{ 4226 if((uint32_t)offset < (uint32_t)length()) { 4227 return getArrayStart()[offset]; 4228 } else { 4229 return kInvalidUChar; 4230 } 4231} 4232 4233inline UChar 4234UnicodeString::charAt(int32_t offset) const 4235{ return doCharAt(offset); } 4236 4237inline UChar 4238UnicodeString::operator[] (int32_t offset) const 4239{ return doCharAt(offset); } 4240 4241inline UBool 4242UnicodeString::isEmpty() const { 4243 return fShortLength == 0; 4244} 4245 4246//======================================== 4247// Write implementation methods 4248//======================================== 4249inline void 4250UnicodeString::setLength(int32_t len) { 4251 if(len <= 127) { 4252 fShortLength = (int8_t)len; 4253 } else { 4254 fShortLength = (int8_t)-1; 4255 fUnion.fFields.fLength = len; 4256 } 4257} 4258 4259inline void 4260UnicodeString::setToEmpty() { 4261 fShortLength = 0; 4262 fFlags = kShortString; 4263} 4264 4265inline void 4266UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { 4267 setLength(len); 4268 fUnion.fFields.fArray = array; 4269 fUnion.fFields.fCapacity = capacity; 4270} 4271 4272inline const UChar * 4273UnicodeString::getTerminatedBuffer() { 4274 if(!isWritable()) { 4275 return 0; 4276 } else { 4277 UChar *array = getArrayStart(); 4278 int32_t len = length(); 4279 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) { 4280 /* 4281 * kRefCounted: Do not write the NUL if the buffer is shared. 4282 * That is mostly safe, except when the length of one copy was modified 4283 * without copy-on-write, e.g., via truncate(newLength) or remove(void). 4284 * Then the NUL would be written into the middle of another copy's string. 4285 */ 4286 if(!(fFlags&kBufferIsReadonly)) { 4287 /* 4288 * We must not write to a readonly buffer, but it is known to be 4289 * NUL-terminated if len<capacity. 4290 * A shared, allocated buffer (refCount()>1) must not have its contents 4291 * modified, but the NUL at [len] is beyond the string contents, 4292 * and multiple string objects and threads writing the same NUL into the 4293 * same location is harmless. 4294 * In all other cases, the buffer is fully writable and it is anyway safe 4295 * to write the NUL. 4296 * 4297 * Note: An earlier version of this code tested whether there is a NUL 4298 * at [len] already, but, while safe, it generated lots of warnings from 4299 * tools like valgrind and Purify. 4300 */ 4301 array[len] = 0; 4302 } 4303 return array; 4304 } else if(cloneArrayIfNeeded(len+1)) { 4305 array = getArrayStart(); 4306 array[len] = 0; 4307 return array; 4308 } else { 4309 return 0; 4310 } 4311 } 4312} 4313 4314inline UnicodeString& 4315UnicodeString::operator= (UChar ch) 4316{ return doReplace(0, length(), &ch, 0, 1); } 4317 4318inline UnicodeString& 4319UnicodeString::operator= (UChar32 ch) 4320{ return replace(0, length(), ch); } 4321 4322inline UnicodeString& 4323UnicodeString::setTo(const UnicodeString& srcText, 4324 int32_t srcStart, 4325 int32_t srcLength) 4326{ 4327 unBogus(); 4328 return doReplace(0, length(), srcText, srcStart, srcLength); 4329} 4330 4331inline UnicodeString& 4332UnicodeString::setTo(const UnicodeString& srcText, 4333 int32_t srcStart) 4334{ 4335 unBogus(); 4336 srcText.pinIndex(srcStart); 4337 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 4338} 4339 4340inline UnicodeString& 4341UnicodeString::setTo(const UnicodeString& srcText) 4342{ 4343 return copyFrom(srcText); 4344} 4345 4346inline UnicodeString& 4347UnicodeString::setTo(const UChar *srcChars, 4348 int32_t srcLength) 4349{ 4350 unBogus(); 4351 return doReplace(0, length(), srcChars, 0, srcLength); 4352} 4353 4354inline UnicodeString& 4355UnicodeString::setTo(UChar srcChar) 4356{ 4357 unBogus(); 4358 return doReplace(0, length(), &srcChar, 0, 1); 4359} 4360 4361inline UnicodeString& 4362UnicodeString::setTo(UChar32 srcChar) 4363{ 4364 unBogus(); 4365 return replace(0, length(), srcChar); 4366} 4367 4368inline UnicodeString& 4369UnicodeString::append(const UnicodeString& srcText, 4370 int32_t srcStart, 4371 int32_t srcLength) 4372{ return doReplace(length(), 0, srcText, srcStart, srcLength); } 4373 4374inline UnicodeString& 4375UnicodeString::append(const UnicodeString& srcText) 4376{ return doReplace(length(), 0, srcText, 0, srcText.length()); } 4377 4378inline UnicodeString& 4379UnicodeString::append(const UChar *srcChars, 4380 int32_t srcStart, 4381 int32_t srcLength) 4382{ return doReplace(length(), 0, srcChars, srcStart, srcLength); } 4383 4384inline UnicodeString& 4385UnicodeString::append(const UChar *srcChars, 4386 int32_t srcLength) 4387{ return doReplace(length(), 0, srcChars, 0, srcLength); } 4388 4389inline UnicodeString& 4390UnicodeString::append(UChar srcChar) 4391{ return doReplace(length(), 0, &srcChar, 0, 1); } 4392 4393inline UnicodeString& 4394UnicodeString::operator+= (UChar ch) 4395{ return doReplace(length(), 0, &ch, 0, 1); } 4396 4397inline UnicodeString& 4398UnicodeString::operator+= (UChar32 ch) { 4399 return append(ch); 4400} 4401 4402inline UnicodeString& 4403UnicodeString::operator+= (const UnicodeString& srcText) 4404{ return doReplace(length(), 0, srcText, 0, srcText.length()); } 4405 4406inline UnicodeString& 4407UnicodeString::insert(int32_t start, 4408 const UnicodeString& srcText, 4409 int32_t srcStart, 4410 int32_t srcLength) 4411{ return doReplace(start, 0, srcText, srcStart, srcLength); } 4412 4413inline UnicodeString& 4414UnicodeString::insert(int32_t start, 4415 const UnicodeString& srcText) 4416{ return doReplace(start, 0, srcText, 0, srcText.length()); } 4417 4418inline UnicodeString& 4419UnicodeString::insert(int32_t start, 4420 const UChar *srcChars, 4421 int32_t srcStart, 4422 int32_t srcLength) 4423{ return doReplace(start, 0, srcChars, srcStart, srcLength); } 4424 4425inline UnicodeString& 4426UnicodeString::insert(int32_t start, 4427 const UChar *srcChars, 4428 int32_t srcLength) 4429{ return doReplace(start, 0, srcChars, 0, srcLength); } 4430 4431inline UnicodeString& 4432UnicodeString::insert(int32_t start, 4433 UChar srcChar) 4434{ return doReplace(start, 0, &srcChar, 0, 1); } 4435 4436inline UnicodeString& 4437UnicodeString::insert(int32_t start, 4438 UChar32 srcChar) 4439{ return replace(start, 0, srcChar); } 4440 4441 4442inline UnicodeString& 4443UnicodeString::remove() 4444{ 4445 // remove() of a bogus string makes the string empty and non-bogus 4446 // we also un-alias a read-only alias to deal with NUL-termination 4447 // issues with getTerminatedBuffer() 4448 if(fFlags & (kIsBogus|kBufferIsReadonly)) { 4449 setToEmpty(); 4450 } else { 4451 fShortLength = 0; 4452 } 4453 return *this; 4454} 4455 4456inline UnicodeString& 4457UnicodeString::remove(int32_t start, 4458 int32_t _length) 4459{ 4460 if(start <= 0 && _length == INT32_MAX) { 4461 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 4462 return remove(); 4463 } 4464 return doReplace(start, _length, NULL, 0, 0); 4465} 4466 4467inline UnicodeString& 4468UnicodeString::removeBetween(int32_t start, 4469 int32_t limit) 4470{ return doReplace(start, limit - start, NULL, 0, 0); } 4471 4472inline UnicodeString & 4473UnicodeString::retainBetween(int32_t start, int32_t limit) { 4474 truncate(limit); 4475 return doReplace(0, start, NULL, 0, 0); 4476} 4477 4478inline UBool 4479UnicodeString::truncate(int32_t targetLength) 4480{ 4481 if(isBogus() && targetLength == 0) { 4482 // truncate(0) of a bogus string makes the string empty and non-bogus 4483 unBogus(); 4484 return FALSE; 4485 } else if((uint32_t)targetLength < (uint32_t)length()) { 4486 setLength(targetLength); 4487 if(fFlags&kBufferIsReadonly) { 4488 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more 4489 } 4490 return TRUE; 4491 } else { 4492 return FALSE; 4493 } 4494} 4495 4496inline UnicodeString& 4497UnicodeString::reverse() 4498{ return doReverse(0, length()); } 4499 4500inline UnicodeString& 4501UnicodeString::reverse(int32_t start, 4502 int32_t _length) 4503{ return doReverse(start, _length); } 4504 4505U_NAMESPACE_END 4506 4507#endif 4508